comparison i386/dsputil_mmx.c @ 1064:b32afefe7d33 libavcodec

* UINTX -> uintx_t INTX -> intx_t
author kabi
date Tue, 11 Feb 2003 16:35:48 +0000
parents bb5de8a59da8
children 92fb44eae6b6
comparison
equal deleted inserted replaced
1063:fdeac9642346 1064:b32afefe7d33
170 #undef PAVGB 170 #undef PAVGB
171 171
172 /***********************************/ 172 /***********************************/
173 /* standard MMX */ 173 /* standard MMX */
174 174
175 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) 175 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
176 { 176 {
177 asm volatile( 177 asm volatile(
178 "movl $-128, %%eax \n\t" 178 "movl $-128, %%eax \n\t"
179 "pxor %%mm7, %%mm7 \n\t" 179 "pxor %%mm7, %%mm7 \n\t"
180 ".balign 16 \n\t" 180 ".balign 16 \n\t"
198 : "r" (block+64), "r" (line_size), "r" (line_size*2) 198 : "r" (block+64), "r" (line_size), "r" (line_size*2)
199 : "%eax" 199 : "%eax"
200 ); 200 );
201 } 201 }
202 202
203 static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) 203 static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)
204 { 204 {
205 asm volatile( 205 asm volatile(
206 "pxor %%mm7, %%mm7 \n\t" 206 "pxor %%mm7, %%mm7 \n\t"
207 "movl $-128, %%eax \n\t" 207 "movl $-128, %%eax \n\t"
208 ".balign 16 \n\t" 208 ".balign 16 \n\t"
227 : "r" (block+64), "r" (stride) 227 : "r" (block+64), "r" (stride)
228 : "%eax" 228 : "%eax"
229 ); 229 );
230 } 230 }
231 231
232 void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) 232 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
233 { 233 {
234 const DCTELEM *p; 234 const DCTELEM *p;
235 UINT8 *pix; 235 uint8_t *pix;
236 236
237 /* read the pixels */ 237 /* read the pixels */
238 p = block; 238 p = block;
239 pix = pixels; 239 pix = pixels;
240 /* unrolled loop */ 240 /* unrolled loop */
282 "movq %%mm6, (%0, %2)\n\t" 282 "movq %%mm6, (%0, %2)\n\t"
283 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) 283 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p)
284 :"memory"); 284 :"memory");
285 } 285 }
286 286
287 void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) 287 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
288 { 288 {
289 const DCTELEM *p; 289 const DCTELEM *p;
290 UINT8 *pix; 290 uint8_t *pix;
291 int i; 291 int i;
292 292
293 /* read the pixels */ 293 /* read the pixels */
294 p = block; 294 p = block;
295 pix = pixels; 295 pix = pixels;
323 pix += line_size*2; 323 pix += line_size*2;
324 p += 16; 324 p += 16;
325 } while (--i); 325 } while (--i);
326 } 326 }
327 327
328 static void put_pixels8_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) 328 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
329 { 329 {
330 __asm __volatile( 330 __asm __volatile(
331 "lea (%3, %3), %%eax \n\t" 331 "lea (%3, %3), %%eax \n\t"
332 ".balign 8 \n\t" 332 ".balign 8 \n\t"
333 "1: \n\t" 333 "1: \n\t"
349 : "r"(line_size) 349 : "r"(line_size)
350 : "%eax", "memory" 350 : "%eax", "memory"
351 ); 351 );
352 } 352 }
353 353
354 static void put_pixels16_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) 354 static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
355 { 355 {
356 __asm __volatile( 356 __asm __volatile(
357 "lea (%3, %3), %%eax \n\t" 357 "lea (%3, %3), %%eax \n\t"
358 ".balign 8 \n\t" 358 ".balign 8 \n\t"
359 "1: \n\t" 359 "1: \n\t"
400 : : "r" (((int)blocks)+128*6) 400 : : "r" (((int)blocks)+128*6)
401 : "%eax" 401 : "%eax"
402 ); 402 );
403 } 403 }
404 404
405 static int pix_sum16_mmx(UINT8 * pix, int line_size){ 405 static int pix_sum16_mmx(uint8_t * pix, int line_size){
406 const int h=16; 406 const int h=16;
407 int sum; 407 int sum;
408 int index= -line_size*h; 408 int index= -line_size*h;
409 409
410 __asm __volatile( 410 __asm __volatile(
503 "movd %%mm1,%1\n" 503 "movd %%mm1,%1\n"
504 : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" ); 504 : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" );
505 return tmp; 505 return tmp;
506 } 506 }
507 507
508 static int sse16_mmx(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) { 508 static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) {
509 int tmp; 509 int tmp;
510 asm volatile ( 510 asm volatile (
511 "movl $16,%%ecx\n" 511 "movl $16,%%ecx\n"
512 "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ 512 "pxor %%mm0,%%mm0\n" /* mm0 = 0 */
513 "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ 513 "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */
1156 : "r"(dstStride), "r"(2*dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*dstStride)\ 1156 : "r"(dstStride), "r"(2*dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*dstStride)\
1157 : "memory"\ 1157 : "memory"\
1158 );\ 1158 );\
1159 }\ 1159 }\
1160 \ 1160 \
1161 static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ 1161 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
1162 OPNAME ## pixels8_mmx(dst, src, stride, 8);\ 1162 OPNAME ## pixels8_mmx(dst, src, stride, 8);\
1163 }\ 1163 }\
1164 \ 1164 \
1165 static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1165 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1166 uint64_t temp[8];\ 1166 uint64_t temp[8];\
1167 uint8_t * const half= (uint8_t*)temp;\ 1167 uint8_t * const half= (uint8_t*)temp;\
1168 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ 1168 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
1169 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ 1169 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
1170 }\ 1170 }\
1171 \ 1171 \
1172 static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1172 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1173 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ 1173 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
1174 }\ 1174 }\
1175 \ 1175 \
1176 static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1176 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1177 uint64_t temp[8];\ 1177 uint64_t temp[8];\
1178 uint8_t * const half= (uint8_t*)temp;\ 1178 uint8_t * const half= (uint8_t*)temp;\
1179 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ 1179 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
1180 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ 1180 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\
1181 }\ 1181 }\
1182 \ 1182 \
1183 static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1183 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1184 uint64_t temp[8];\ 1184 uint64_t temp[8];\
1185 uint8_t * const half= (uint8_t*)temp;\ 1185 uint8_t * const half= (uint8_t*)temp;\
1186 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ 1186 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
1187 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ 1187 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
1188 }\ 1188 }\
1189 \ 1189 \
1190 static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1190 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1191 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ 1191 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
1192 }\ 1192 }\
1193 \ 1193 \
1194 static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1194 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1195 uint64_t temp[8];\ 1195 uint64_t temp[8];\
1196 uint8_t * const half= (uint8_t*)temp;\ 1196 uint8_t * const half= (uint8_t*)temp;\
1197 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ 1197 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
1198 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ 1198 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\
1199 }\ 1199 }\
1200 static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1200 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1201 uint64_t half[8 + 9];\ 1201 uint64_t half[8 + 9];\
1202 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1202 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1203 uint8_t * const halfHV= ((uint8_t*)half);\ 1203 uint8_t * const halfHV= ((uint8_t*)half);\
1204 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1204 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1205 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ 1205 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
1206 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1206 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1207 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ 1207 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
1208 }\ 1208 }\
1209 static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1209 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1210 uint64_t half[8 + 9];\ 1210 uint64_t half[8 + 9];\
1211 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1211 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1212 uint8_t * const halfHV= ((uint8_t*)half);\ 1212 uint8_t * const halfHV= ((uint8_t*)half);\
1213 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1213 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1214 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ 1214 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
1215 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1215 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1216 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ 1216 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
1217 }\ 1217 }\
1218 static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1218 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1219 uint64_t half[8 + 9];\ 1219 uint64_t half[8 + 9];\
1220 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1220 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1221 uint8_t * const halfHV= ((uint8_t*)half);\ 1221 uint8_t * const halfHV= ((uint8_t*)half);\
1222 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1222 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1223 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ 1223 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
1224 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1224 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1225 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ 1225 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
1226 }\ 1226 }\
1227 static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1227 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1228 uint64_t half[8 + 9];\ 1228 uint64_t half[8 + 9];\
1229 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1229 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1230 uint8_t * const halfHV= ((uint8_t*)half);\ 1230 uint8_t * const halfHV= ((uint8_t*)half);\
1231 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1231 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1232 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ 1232 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
1233 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1233 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1234 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ 1234 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
1235 }\ 1235 }\
1236 static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1236 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1237 uint64_t half[8 + 9];\ 1237 uint64_t half[8 + 9];\
1238 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1238 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1239 uint8_t * const halfHV= ((uint8_t*)half);\ 1239 uint8_t * const halfHV= ((uint8_t*)half);\
1240 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1240 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1241 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1241 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1242 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ 1242 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
1243 }\ 1243 }\
1244 static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1244 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1245 uint64_t half[8 + 9];\ 1245 uint64_t half[8 + 9];\
1246 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1246 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1247 uint8_t * const halfHV= ((uint8_t*)half);\ 1247 uint8_t * const halfHV= ((uint8_t*)half);\
1248 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1248 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1249 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1249 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1250 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ 1250 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
1251 }\ 1251 }\
1252 static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1252 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1253 uint64_t half[8 + 9];\ 1253 uint64_t half[8 + 9];\
1254 uint8_t * const halfH= ((uint8_t*)half);\ 1254 uint8_t * const halfH= ((uint8_t*)half);\
1255 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1255 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1256 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ 1256 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
1257 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ 1257 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
1258 }\ 1258 }\
1259 static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1259 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1260 uint64_t half[8 + 9];\ 1260 uint64_t half[8 + 9];\
1261 uint8_t * const halfH= ((uint8_t*)half);\ 1261 uint8_t * const halfH= ((uint8_t*)half);\
1262 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1262 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1263 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ 1263 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
1264 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ 1264 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
1265 }\ 1265 }\
1266 static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1266 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1267 uint64_t half[9];\ 1267 uint64_t half[9];\
1268 uint8_t * const halfH= ((uint8_t*)half);\ 1268 uint8_t * const halfH= ((uint8_t*)half);\
1269 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1269 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1270 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ 1270 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
1271 }\ 1271 }\
1272 static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ 1272 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
1273 OPNAME ## pixels16_mmx(dst, src, stride, 16);\ 1273 OPNAME ## pixels16_mmx(dst, src, stride, 16);\
1274 }\ 1274 }\
1275 \ 1275 \
1276 static void OPNAME ## qpel16_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1276 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1277 uint64_t temp[32];\ 1277 uint64_t temp[32];\
1278 uint8_t * const half= (uint8_t*)temp;\ 1278 uint8_t * const half= (uint8_t*)temp;\
1279 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ 1279 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
1280 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ 1280 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\
1281 }\ 1281 }\
1282 \ 1282 \
1283 static void OPNAME ## qpel16_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1283 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1284 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\ 1284 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\
1285 }\ 1285 }\
1286 \ 1286 \
1287 static void OPNAME ## qpel16_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1287 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1288 uint64_t temp[32];\ 1288 uint64_t temp[32];\
1289 uint8_t * const half= (uint8_t*)temp;\ 1289 uint8_t * const half= (uint8_t*)temp;\
1290 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ 1290 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
1291 OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\ 1291 OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\
1292 }\ 1292 }\
1293 \ 1293 \
1294 static void OPNAME ## qpel16_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1294 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1295 uint64_t temp[32];\ 1295 uint64_t temp[32];\
1296 uint8_t * const half= (uint8_t*)temp;\ 1296 uint8_t * const half= (uint8_t*)temp;\
1297 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ 1297 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
1298 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ 1298 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\
1299 }\ 1299 }\
1300 \ 1300 \
1301 static void OPNAME ## qpel16_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1301 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1302 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\ 1302 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\
1303 }\ 1303 }\
1304 \ 1304 \
1305 static void OPNAME ## qpel16_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1305 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1306 uint64_t temp[32];\ 1306 uint64_t temp[32];\
1307 uint8_t * const half= (uint8_t*)temp;\ 1307 uint8_t * const half= (uint8_t*)temp;\
1308 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ 1308 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
1309 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ 1309 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\
1310 }\ 1310 }\
1311 static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1311 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1312 uint64_t half[16*2 + 17*2];\ 1312 uint64_t half[16*2 + 17*2];\
1313 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 1313 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1314 uint8_t * const halfHV= ((uint8_t*)half);\ 1314 uint8_t * const halfHV= ((uint8_t*)half);\
1315 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1315 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1316 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ 1316 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
1317 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1317 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1318 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ 1318 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
1319 }\ 1319 }\
1320 static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1320 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1321 uint64_t half[16*2 + 17*2];\ 1321 uint64_t half[16*2 + 17*2];\
1322 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 1322 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1323 uint8_t * const halfHV= ((uint8_t*)half);\ 1323 uint8_t * const halfHV= ((uint8_t*)half);\
1324 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1324 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1325 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ 1325 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
1326 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1326 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1327 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ 1327 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
1328 }\ 1328 }\
1329 static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1329 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1330 uint64_t half[16*2 + 17*2];\ 1330 uint64_t half[16*2 + 17*2];\
1331 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 1331 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1332 uint8_t * const halfHV= ((uint8_t*)half);\ 1332 uint8_t * const halfHV= ((uint8_t*)half);\
1333 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1333 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1334 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ 1334 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
1335 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1335 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1336 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ 1336 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
1337 }\ 1337 }\
1338 static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1338 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1339 uint64_t half[16*2 + 17*2];\ 1339 uint64_t half[16*2 + 17*2];\
1340 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 1340 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1341 uint8_t * const halfHV= ((uint8_t*)half);\ 1341 uint8_t * const halfHV= ((uint8_t*)half);\
1342 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1342 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1343 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ 1343 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
1344 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1344 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1345 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ 1345 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
1346 }\ 1346 }\
1347 static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1347 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1348 uint64_t half[16*2 + 17*2];\ 1348 uint64_t half[16*2 + 17*2];\
1349 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 1349 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1350 uint8_t * const halfHV= ((uint8_t*)half);\ 1350 uint8_t * const halfHV= ((uint8_t*)half);\
1351 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1351 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1352 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1352 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1353 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ 1353 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
1354 }\ 1354 }\
1355 static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1355 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1356 uint64_t half[16*2 + 17*2];\ 1356 uint64_t half[16*2 + 17*2];\
1357 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 1357 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1358 uint8_t * const halfHV= ((uint8_t*)half);\ 1358 uint8_t * const halfHV= ((uint8_t*)half);\
1359 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1359 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1360 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1360 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1361 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ 1361 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
1362 }\ 1362 }\
1363 static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1363 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1364 uint64_t half[17*2];\ 1364 uint64_t half[17*2];\
1365 uint8_t * const halfH= ((uint8_t*)half);\ 1365 uint8_t * const halfH= ((uint8_t*)half);\
1366 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1366 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1367 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ 1367 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
1368 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ 1368 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
1369 }\ 1369 }\
1370 static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1370 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1371 uint64_t half[17*2];\ 1371 uint64_t half[17*2];\
1372 uint8_t * const halfH= ((uint8_t*)half);\ 1372 uint8_t * const halfH= ((uint8_t*)half);\
1373 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1373 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1374 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ 1374 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
1375 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ 1375 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
1376 }\ 1376 }\
1377 static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1377 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1378 uint64_t half[17*2];\ 1378 uint64_t half[17*2];\
1379 uint8_t * const halfH= ((uint8_t*)half);\ 1379 uint8_t * const halfH= ((uint8_t*)half);\
1380 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1380 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1381 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ 1381 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
1382 } 1382 }