comparison i386/dsputil_mmx.c @ 2207:22b768f1261a libavcodec

10000l fix and use more mmx2/3dnow code for mpeg4 qpel which has been written and commited long time ago but appearently never used, qpel motion compensation is 5% faster now
author michael
date Mon, 06 Sep 2004 03:17:31 +0000
parents f37b6ffc81ed
children c4a476971abc
comparison
equal deleted inserted replaced
2206:713ad427a3c7 2207:22b768f1261a
1332 } 1332 }
1333 1333
1334 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ 1334 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
1335 int i=0; 1335 int i=0;
1336 uint8_t l, lt; 1336 uint8_t l, lt;
1337 1337
1338 asm volatile( 1338 asm volatile(
1339 "1: \n\t" 1339 "1: \n\t"
1340 "movq -1(%1, %0), %%mm0 \n\t" // LT 1340 "movq -1(%1, %0), %%mm0 \n\t" // LT
1341 "movq (%1, %0), %%mm1 \n\t" // T 1341 "movq (%1, %0), %%mm1 \n\t" // T
1342 "movq -1(%2, %0), %%mm2 \n\t" // L 1342 "movq -1(%2, %0), %%mm2 \n\t" // L
2044 \ 2044 \
2045 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2045 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2046 uint64_t temp[8];\ 2046 uint64_t temp[8];\
2047 uint8_t * const half= (uint8_t*)temp;\ 2047 uint8_t * const half= (uint8_t*)temp;\
2048 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ 2048 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
2049 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ 2049 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
2050 }\ 2050 }\
2051 \ 2051 \
2052 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2052 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2053 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ 2053 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
2054 }\ 2054 }\
2055 \ 2055 \
2056 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2056 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2057 uint64_t temp[8];\ 2057 uint64_t temp[8];\
2058 uint8_t * const half= (uint8_t*)temp;\ 2058 uint8_t * const half= (uint8_t*)temp;\
2059 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ 2059 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
2060 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ 2060 OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\
2061 }\ 2061 }\
2062 \ 2062 \
2063 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2063 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2064 uint64_t temp[8];\ 2064 uint64_t temp[8];\
2065 uint8_t * const half= (uint8_t*)temp;\ 2065 uint8_t * const half= (uint8_t*)temp;\
2066 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ 2066 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
2067 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ 2067 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
2068 }\ 2068 }\
2069 \ 2069 \
2070 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2070 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2071 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ 2071 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
2072 }\ 2072 }\
2073 \ 2073 \
2074 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2074 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2075 uint64_t temp[8];\ 2075 uint64_t temp[8];\
2076 uint8_t * const half= (uint8_t*)temp;\ 2076 uint8_t * const half= (uint8_t*)temp;\
2077 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ 2077 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
2078 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ 2078 OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\
2079 }\ 2079 }\
2080 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2080 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2081 uint64_t half[8 + 9];\ 2081 uint64_t half[8 + 9];\
2082 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 2082 uint8_t * const halfH= ((uint8_t*)half) + 64;\
2083 uint8_t * const halfHV= ((uint8_t*)half);\ 2083 uint8_t * const halfHV= ((uint8_t*)half);\
2084 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2084 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2085 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ 2085 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
2086 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 2086 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
2087 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ 2087 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
2088 }\ 2088 }\
2089 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2089 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2090 uint64_t half[8 + 9];\ 2090 uint64_t half[8 + 9];\
2091 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 2091 uint8_t * const halfH= ((uint8_t*)half) + 64;\
2092 uint8_t * const halfHV= ((uint8_t*)half);\ 2092 uint8_t * const halfHV= ((uint8_t*)half);\
2093 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2093 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2094 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ 2094 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
2095 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 2095 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
2096 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ 2096 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
2097 }\ 2097 }\
2098 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2098 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2099 uint64_t half[8 + 9];\ 2099 uint64_t half[8 + 9];\
2100 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 2100 uint8_t * const halfH= ((uint8_t*)half) + 64;\
2101 uint8_t * const halfHV= ((uint8_t*)half);\ 2101 uint8_t * const halfHV= ((uint8_t*)half);\
2102 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2102 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2103 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ 2103 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
2104 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 2104 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
2105 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ 2105 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
2106 }\ 2106 }\
2107 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2107 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2108 uint64_t half[8 + 9];\ 2108 uint64_t half[8 + 9];\
2109 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 2109 uint8_t * const halfH= ((uint8_t*)half) + 64;\
2110 uint8_t * const halfHV= ((uint8_t*)half);\ 2110 uint8_t * const halfHV= ((uint8_t*)half);\
2111 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2111 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2112 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ 2112 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
2113 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 2113 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
2114 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ 2114 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
2115 }\ 2115 }\
2116 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2116 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2117 uint64_t half[8 + 9];\ 2117 uint64_t half[8 + 9];\
2118 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 2118 uint8_t * const halfH= ((uint8_t*)half) + 64;\
2119 uint8_t * const halfHV= ((uint8_t*)half);\ 2119 uint8_t * const halfHV= ((uint8_t*)half);\
2120 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2120 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2121 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 2121 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
2122 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ 2122 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
2123 }\ 2123 }\
2124 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2124 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2125 uint64_t half[8 + 9];\ 2125 uint64_t half[8 + 9];\
2126 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 2126 uint8_t * const halfH= ((uint8_t*)half) + 64;\
2127 uint8_t * const halfHV= ((uint8_t*)half);\ 2127 uint8_t * const halfHV= ((uint8_t*)half);\
2128 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2128 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2129 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 2129 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
2130 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ 2130 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
2131 }\ 2131 }\
2132 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2132 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2133 uint64_t half[8 + 9];\ 2133 uint64_t half[8 + 9];\
2134 uint8_t * const halfH= ((uint8_t*)half);\ 2134 uint8_t * const halfH= ((uint8_t*)half);\
2135 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2135 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2136 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ 2136 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
2137 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ 2137 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
2138 }\ 2138 }\
2139 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2139 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2140 uint64_t half[8 + 9];\ 2140 uint64_t half[8 + 9];\
2141 uint8_t * const halfH= ((uint8_t*)half);\ 2141 uint8_t * const halfH= ((uint8_t*)half);\
2142 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 2142 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
2143 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ 2143 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
2144 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ 2144 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
2145 }\ 2145 }\
2146 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2146 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2147 uint64_t half[9];\ 2147 uint64_t half[9];\
2148 uint8_t * const halfH= ((uint8_t*)half);\ 2148 uint8_t * const halfH= ((uint8_t*)half);\
2155 \ 2155 \
2156 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2156 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2157 uint64_t temp[32];\ 2157 uint64_t temp[32];\
2158 uint8_t * const half= (uint8_t*)temp;\ 2158 uint8_t * const half= (uint8_t*)temp;\
2159 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ 2159 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
2160 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ 2160 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
2161 }\ 2161 }\
2162 \ 2162 \
2163 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2163 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2164 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\ 2164 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\
2165 }\ 2165 }\
2166 \ 2166 \
2167 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2167 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2168 uint64_t temp[32];\ 2168 uint64_t temp[32];\
2169 uint8_t * const half= (uint8_t*)temp;\ 2169 uint8_t * const half= (uint8_t*)temp;\
2170 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ 2170 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
2171 OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\ 2171 OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\
2172 }\ 2172 }\
2173 \ 2173 \
2174 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2174 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2175 uint64_t temp[32];\ 2175 uint64_t temp[32];\
2176 uint8_t * const half= (uint8_t*)temp;\ 2176 uint8_t * const half= (uint8_t*)temp;\
2177 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ 2177 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
2178 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ 2178 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
2179 }\ 2179 }\
2180 \ 2180 \
2181 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2181 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2182 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\ 2182 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\
2183 }\ 2183 }\
2184 \ 2184 \
2185 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2185 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2186 uint64_t temp[32];\ 2186 uint64_t temp[32];\
2187 uint8_t * const half= (uint8_t*)temp;\ 2187 uint8_t * const half= (uint8_t*)temp;\
2188 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ 2188 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
2189 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ 2189 OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\
2190 }\ 2190 }\
2191 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2191 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2192 uint64_t half[16*2 + 17*2];\ 2192 uint64_t half[16*2 + 17*2];\
2193 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 2193 uint8_t * const halfH= ((uint8_t*)half) + 256;\
2194 uint8_t * const halfHV= ((uint8_t*)half);\ 2194 uint8_t * const halfHV= ((uint8_t*)half);\
2195 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2195 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2196 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ 2196 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
2197 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 2197 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
2198 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ 2198 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
2199 }\ 2199 }\
2200 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2200 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2201 uint64_t half[16*2 + 17*2];\ 2201 uint64_t half[16*2 + 17*2];\
2202 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 2202 uint8_t * const halfH= ((uint8_t*)half) + 256;\
2203 uint8_t * const halfHV= ((uint8_t*)half);\ 2203 uint8_t * const halfHV= ((uint8_t*)half);\
2204 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2204 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2205 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ 2205 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
2206 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 2206 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
2207 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ 2207 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
2208 }\ 2208 }\
2209 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2209 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2210 uint64_t half[16*2 + 17*2];\ 2210 uint64_t half[16*2 + 17*2];\
2211 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 2211 uint8_t * const halfH= ((uint8_t*)half) + 256;\
2212 uint8_t * const halfHV= ((uint8_t*)half);\ 2212 uint8_t * const halfHV= ((uint8_t*)half);\
2213 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2213 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2214 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ 2214 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
2215 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 2215 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
2216 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ 2216 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
2217 }\ 2217 }\
2218 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2218 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2219 uint64_t half[16*2 + 17*2];\ 2219 uint64_t half[16*2 + 17*2];\
2220 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 2220 uint8_t * const halfH= ((uint8_t*)half) + 256;\
2221 uint8_t * const halfHV= ((uint8_t*)half);\ 2221 uint8_t * const halfHV= ((uint8_t*)half);\
2222 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2222 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2223 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ 2223 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
2224 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 2224 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
2225 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ 2225 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
2226 }\ 2226 }\
2227 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2227 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2228 uint64_t half[16*2 + 17*2];\ 2228 uint64_t half[16*2 + 17*2];\
2229 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 2229 uint8_t * const halfH= ((uint8_t*)half) + 256;\
2230 uint8_t * const halfHV= ((uint8_t*)half);\ 2230 uint8_t * const halfHV= ((uint8_t*)half);\
2231 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2231 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2232 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 2232 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
2233 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ 2233 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
2234 }\ 2234 }\
2235 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2235 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2236 uint64_t half[16*2 + 17*2];\ 2236 uint64_t half[16*2 + 17*2];\
2237 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 2237 uint8_t * const halfH= ((uint8_t*)half) + 256;\
2238 uint8_t * const halfHV= ((uint8_t*)half);\ 2238 uint8_t * const halfHV= ((uint8_t*)half);\
2239 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2239 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2240 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 2240 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
2241 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ 2241 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
2242 }\ 2242 }\
2243 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2243 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2244 uint64_t half[17*2];\ 2244 uint64_t half[17*2];\
2245 uint8_t * const halfH= ((uint8_t*)half);\ 2245 uint8_t * const halfH= ((uint8_t*)half);\
2246 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2246 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2247 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ 2247 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
2248 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ 2248 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
2249 }\ 2249 }\
2250 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2250 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2251 uint64_t half[17*2];\ 2251 uint64_t half[17*2];\
2252 uint8_t * const halfH= ((uint8_t*)half);\ 2252 uint8_t * const halfH= ((uint8_t*)half);\
2253 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 2253 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
2254 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ 2254 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
2255 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ 2255 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
2256 }\ 2256 }\
2257 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ 2257 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
2258 uint64_t half[17*2];\ 2258 uint64_t half[17*2];\
2259 uint8_t * const halfH= ((uint8_t*)half);\ 2259 uint8_t * const halfH= ((uint8_t*)half);\