Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 2207:22b768f1261a libavcodec
10000l fix and use more mmx2/3dnow code for mpeg4 qpel which has been written and commited long time ago but appearently never used, qpel motion compensation is 5% faster
now
author | michael |
---|---|
date | Mon, 06 Sep 2004 03:17:31 +0000 |
parents | f37b6ffc81ed |
children | c4a476971abc |
comparison
equal
deleted
inserted
replaced
2206:713ad427a3c7 | 2207:22b768f1261a |
---|---|
1332 } | 1332 } |
1333 | 1333 |
1334 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ | 1334 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ |
1335 int i=0; | 1335 int i=0; |
1336 uint8_t l, lt; | 1336 uint8_t l, lt; |
1337 | 1337 |
1338 asm volatile( | 1338 asm volatile( |
1339 "1: \n\t" | 1339 "1: \n\t" |
1340 "movq -1(%1, %0), %%mm0 \n\t" // LT | 1340 "movq -1(%1, %0), %%mm0 \n\t" // LT |
1341 "movq (%1, %0), %%mm1 \n\t" // T | 1341 "movq (%1, %0), %%mm1 \n\t" // T |
1342 "movq -1(%2, %0), %%mm2 \n\t" // L | 1342 "movq -1(%2, %0), %%mm2 \n\t" // L |
2044 \ | 2044 \ |
2045 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2045 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2046 uint64_t temp[8];\ | 2046 uint64_t temp[8];\ |
2047 uint8_t * const half= (uint8_t*)temp;\ | 2047 uint8_t * const half= (uint8_t*)temp;\ |
2048 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ | 2048 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ |
2049 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ | 2049 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\ |
2050 }\ | 2050 }\ |
2051 \ | 2051 \ |
2052 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2052 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2053 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ | 2053 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ |
2054 }\ | 2054 }\ |
2055 \ | 2055 \ |
2056 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2056 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2057 uint64_t temp[8];\ | 2057 uint64_t temp[8];\ |
2058 uint8_t * const half= (uint8_t*)temp;\ | 2058 uint8_t * const half= (uint8_t*)temp;\ |
2059 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ | 2059 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ |
2060 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ | 2060 OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\ |
2061 }\ | 2061 }\ |
2062 \ | 2062 \ |
2063 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2063 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2064 uint64_t temp[8];\ | 2064 uint64_t temp[8];\ |
2065 uint8_t * const half= (uint8_t*)temp;\ | 2065 uint8_t * const half= (uint8_t*)temp;\ |
2066 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ | 2066 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ |
2067 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ | 2067 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\ |
2068 }\ | 2068 }\ |
2069 \ | 2069 \ |
2070 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2070 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2071 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ | 2071 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ |
2072 }\ | 2072 }\ |
2073 \ | 2073 \ |
2074 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2074 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2075 uint64_t temp[8];\ | 2075 uint64_t temp[8];\ |
2076 uint8_t * const half= (uint8_t*)temp;\ | 2076 uint8_t * const half= (uint8_t*)temp;\ |
2077 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ | 2077 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ |
2078 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ | 2078 OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\ |
2079 }\ | 2079 }\ |
2080 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2080 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2081 uint64_t half[8 + 9];\ | 2081 uint64_t half[8 + 9];\ |
2082 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 2082 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
2083 uint8_t * const halfHV= ((uint8_t*)half);\ | 2083 uint8_t * const halfHV= ((uint8_t*)half);\ |
2084 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2084 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2085 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ | 2085 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\ |
2086 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 2086 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
2087 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ | 2087 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\ |
2088 }\ | 2088 }\ |
2089 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2089 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2090 uint64_t half[8 + 9];\ | 2090 uint64_t half[8 + 9];\ |
2091 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 2091 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
2092 uint8_t * const halfHV= ((uint8_t*)half);\ | 2092 uint8_t * const halfHV= ((uint8_t*)half);\ |
2093 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2093 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2094 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ | 2094 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\ |
2095 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 2095 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
2096 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ | 2096 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\ |
2097 }\ | 2097 }\ |
2098 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2098 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2099 uint64_t half[8 + 9];\ | 2099 uint64_t half[8 + 9];\ |
2100 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 2100 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
2101 uint8_t * const halfHV= ((uint8_t*)half);\ | 2101 uint8_t * const halfHV= ((uint8_t*)half);\ |
2102 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2102 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2103 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ | 2103 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\ |
2104 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 2104 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
2105 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ | 2105 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\ |
2106 }\ | 2106 }\ |
2107 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2107 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2108 uint64_t half[8 + 9];\ | 2108 uint64_t half[8 + 9];\ |
2109 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 2109 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
2110 uint8_t * const halfHV= ((uint8_t*)half);\ | 2110 uint8_t * const halfHV= ((uint8_t*)half);\ |
2111 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2111 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2112 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ | 2112 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\ |
2113 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 2113 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
2114 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ | 2114 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\ |
2115 }\ | 2115 }\ |
2116 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2116 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2117 uint64_t half[8 + 9];\ | 2117 uint64_t half[8 + 9];\ |
2118 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 2118 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
2119 uint8_t * const halfHV= ((uint8_t*)half);\ | 2119 uint8_t * const halfHV= ((uint8_t*)half);\ |
2120 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2120 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2121 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 2121 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
2122 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ | 2122 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\ |
2123 }\ | 2123 }\ |
2124 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2124 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2125 uint64_t half[8 + 9];\ | 2125 uint64_t half[8 + 9];\ |
2126 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 2126 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
2127 uint8_t * const halfHV= ((uint8_t*)half);\ | 2127 uint8_t * const halfHV= ((uint8_t*)half);\ |
2128 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2128 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2129 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 2129 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
2130 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ | 2130 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\ |
2131 }\ | 2131 }\ |
2132 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2132 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2133 uint64_t half[8 + 9];\ | 2133 uint64_t half[8 + 9];\ |
2134 uint8_t * const halfH= ((uint8_t*)half);\ | 2134 uint8_t * const halfH= ((uint8_t*)half);\ |
2135 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2135 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2136 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ | 2136 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\ |
2137 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ | 2137 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
2138 }\ | 2138 }\ |
2139 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2139 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2140 uint64_t half[8 + 9];\ | 2140 uint64_t half[8 + 9];\ |
2141 uint8_t * const halfH= ((uint8_t*)half);\ | 2141 uint8_t * const halfH= ((uint8_t*)half);\ |
2142 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 2142 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
2143 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ | 2143 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\ |
2144 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ | 2144 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
2145 }\ | 2145 }\ |
2146 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2146 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2147 uint64_t half[9];\ | 2147 uint64_t half[9];\ |
2148 uint8_t * const halfH= ((uint8_t*)half);\ | 2148 uint8_t * const halfH= ((uint8_t*)half);\ |
2155 \ | 2155 \ |
2156 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2156 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2157 uint64_t temp[32];\ | 2157 uint64_t temp[32];\ |
2158 uint8_t * const half= (uint8_t*)temp;\ | 2158 uint8_t * const half= (uint8_t*)temp;\ |
2159 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ | 2159 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ |
2160 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ | 2160 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\ |
2161 }\ | 2161 }\ |
2162 \ | 2162 \ |
2163 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2163 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2164 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\ | 2164 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\ |
2165 }\ | 2165 }\ |
2166 \ | 2166 \ |
2167 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2167 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2168 uint64_t temp[32];\ | 2168 uint64_t temp[32];\ |
2169 uint8_t * const half= (uint8_t*)temp;\ | 2169 uint8_t * const half= (uint8_t*)temp;\ |
2170 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ | 2170 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ |
2171 OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\ | 2171 OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\ |
2172 }\ | 2172 }\ |
2173 \ | 2173 \ |
2174 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2174 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2175 uint64_t temp[32];\ | 2175 uint64_t temp[32];\ |
2176 uint8_t * const half= (uint8_t*)temp;\ | 2176 uint8_t * const half= (uint8_t*)temp;\ |
2177 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ | 2177 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ |
2178 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ | 2178 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\ |
2179 }\ | 2179 }\ |
2180 \ | 2180 \ |
2181 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2181 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2182 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\ | 2182 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\ |
2183 }\ | 2183 }\ |
2184 \ | 2184 \ |
2185 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2185 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2186 uint64_t temp[32];\ | 2186 uint64_t temp[32];\ |
2187 uint8_t * const half= (uint8_t*)temp;\ | 2187 uint8_t * const half= (uint8_t*)temp;\ |
2188 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ | 2188 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ |
2189 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ | 2189 OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\ |
2190 }\ | 2190 }\ |
2191 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2191 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2192 uint64_t half[16*2 + 17*2];\ | 2192 uint64_t half[16*2 + 17*2];\ |
2193 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 2193 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
2194 uint8_t * const halfHV= ((uint8_t*)half);\ | 2194 uint8_t * const halfHV= ((uint8_t*)half);\ |
2195 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2195 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2196 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ | 2196 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\ |
2197 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 2197 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
2198 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ | 2198 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\ |
2199 }\ | 2199 }\ |
2200 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2200 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2201 uint64_t half[16*2 + 17*2];\ | 2201 uint64_t half[16*2 + 17*2];\ |
2202 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 2202 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
2203 uint8_t * const halfHV= ((uint8_t*)half);\ | 2203 uint8_t * const halfHV= ((uint8_t*)half);\ |
2204 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2204 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2205 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ | 2205 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\ |
2206 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 2206 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
2207 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ | 2207 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\ |
2208 }\ | 2208 }\ |
2209 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2209 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2210 uint64_t half[16*2 + 17*2];\ | 2210 uint64_t half[16*2 + 17*2];\ |
2211 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 2211 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
2212 uint8_t * const halfHV= ((uint8_t*)half);\ | 2212 uint8_t * const halfHV= ((uint8_t*)half);\ |
2213 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2213 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2214 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ | 2214 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\ |
2215 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 2215 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
2216 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ | 2216 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\ |
2217 }\ | 2217 }\ |
2218 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2218 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2219 uint64_t half[16*2 + 17*2];\ | 2219 uint64_t half[16*2 + 17*2];\ |
2220 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 2220 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
2221 uint8_t * const halfHV= ((uint8_t*)half);\ | 2221 uint8_t * const halfHV= ((uint8_t*)half);\ |
2222 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2222 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2223 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ | 2223 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\ |
2224 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 2224 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
2225 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ | 2225 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\ |
2226 }\ | 2226 }\ |
2227 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2227 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2228 uint64_t half[16*2 + 17*2];\ | 2228 uint64_t half[16*2 + 17*2];\ |
2229 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 2229 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
2230 uint8_t * const halfHV= ((uint8_t*)half);\ | 2230 uint8_t * const halfHV= ((uint8_t*)half);\ |
2231 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2231 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2232 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 2232 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
2233 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ | 2233 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\ |
2234 }\ | 2234 }\ |
2235 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2235 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2236 uint64_t half[16*2 + 17*2];\ | 2236 uint64_t half[16*2 + 17*2];\ |
2237 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 2237 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
2238 uint8_t * const halfHV= ((uint8_t*)half);\ | 2238 uint8_t * const halfHV= ((uint8_t*)half);\ |
2239 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2239 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2240 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 2240 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
2241 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ | 2241 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\ |
2242 }\ | 2242 }\ |
2243 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2243 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2244 uint64_t half[17*2];\ | 2244 uint64_t half[17*2];\ |
2245 uint8_t * const halfH= ((uint8_t*)half);\ | 2245 uint8_t * const halfH= ((uint8_t*)half);\ |
2246 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2246 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2247 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ | 2247 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\ |
2248 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ | 2248 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ |
2249 }\ | 2249 }\ |
2250 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2250 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2251 uint64_t half[17*2];\ | 2251 uint64_t half[17*2];\ |
2252 uint8_t * const halfH= ((uint8_t*)half);\ | 2252 uint8_t * const halfH= ((uint8_t*)half);\ |
2253 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 2253 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
2254 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ | 2254 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\ |
2255 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ | 2255 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ |
2256 }\ | 2256 }\ |
2257 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | 2257 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
2258 uint64_t half[17*2];\ | 2258 uint64_t half[17*2];\ |
2259 uint8_t * const halfH= ((uint8_t*)half);\ | 2259 uint8_t * const halfH= ((uint8_t*)half);\ |