Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 1064:b32afefe7d33 libavcodec
* UINTX -> uintx_t INTX -> intx_t
author | kabi |
---|---|
date | Tue, 11 Feb 2003 16:35:48 +0000 |
parents | bb5de8a59da8 |
children | 92fb44eae6b6 |
comparison
equal
deleted
inserted
replaced
1063:fdeac9642346 | 1064:b32afefe7d33 |
---|---|
170 #undef PAVGB | 170 #undef PAVGB |
171 | 171 |
172 /***********************************/ | 172 /***********************************/ |
173 /* standard MMX */ | 173 /* standard MMX */ |
174 | 174 |
175 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) | 175 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) |
176 { | 176 { |
177 asm volatile( | 177 asm volatile( |
178 "movl $-128, %%eax \n\t" | 178 "movl $-128, %%eax \n\t" |
179 "pxor %%mm7, %%mm7 \n\t" | 179 "pxor %%mm7, %%mm7 \n\t" |
180 ".balign 16 \n\t" | 180 ".balign 16 \n\t" |
198 : "r" (block+64), "r" (line_size), "r" (line_size*2) | 198 : "r" (block+64), "r" (line_size), "r" (line_size*2) |
199 : "%eax" | 199 : "%eax" |
200 ); | 200 ); |
201 } | 201 } |
202 | 202 |
203 static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) | 203 static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) |
204 { | 204 { |
205 asm volatile( | 205 asm volatile( |
206 "pxor %%mm7, %%mm7 \n\t" | 206 "pxor %%mm7, %%mm7 \n\t" |
207 "movl $-128, %%eax \n\t" | 207 "movl $-128, %%eax \n\t" |
208 ".balign 16 \n\t" | 208 ".balign 16 \n\t" |
227 : "r" (block+64), "r" (stride) | 227 : "r" (block+64), "r" (stride) |
228 : "%eax" | 228 : "%eax" |
229 ); | 229 ); |
230 } | 230 } |
231 | 231 |
232 void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | 232 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
233 { | 233 { |
234 const DCTELEM *p; | 234 const DCTELEM *p; |
235 UINT8 *pix; | 235 uint8_t *pix; |
236 | 236 |
237 /* read the pixels */ | 237 /* read the pixels */ |
238 p = block; | 238 p = block; |
239 pix = pixels; | 239 pix = pixels; |
240 /* unrolled loop */ | 240 /* unrolled loop */ |
282 "movq %%mm6, (%0, %2)\n\t" | 282 "movq %%mm6, (%0, %2)\n\t" |
283 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) | 283 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) |
284 :"memory"); | 284 :"memory"); |
285 } | 285 } |
286 | 286 |
287 void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | 287 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |
288 { | 288 { |
289 const DCTELEM *p; | 289 const DCTELEM *p; |
290 UINT8 *pix; | 290 uint8_t *pix; |
291 int i; | 291 int i; |
292 | 292 |
293 /* read the pixels */ | 293 /* read the pixels */ |
294 p = block; | 294 p = block; |
295 pix = pixels; | 295 pix = pixels; |
323 pix += line_size*2; | 323 pix += line_size*2; |
324 p += 16; | 324 p += 16; |
325 } while (--i); | 325 } while (--i); |
326 } | 326 } |
327 | 327 |
328 static void put_pixels8_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 328 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
329 { | 329 { |
330 __asm __volatile( | 330 __asm __volatile( |
331 "lea (%3, %3), %%eax \n\t" | 331 "lea (%3, %3), %%eax \n\t" |
332 ".balign 8 \n\t" | 332 ".balign 8 \n\t" |
333 "1: \n\t" | 333 "1: \n\t" |
349 : "r"(line_size) | 349 : "r"(line_size) |
350 : "%eax", "memory" | 350 : "%eax", "memory" |
351 ); | 351 ); |
352 } | 352 } |
353 | 353 |
354 static void put_pixels16_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 354 static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
355 { | 355 { |
356 __asm __volatile( | 356 __asm __volatile( |
357 "lea (%3, %3), %%eax \n\t" | 357 "lea (%3, %3), %%eax \n\t" |
358 ".balign 8 \n\t" | 358 ".balign 8 \n\t" |
359 "1: \n\t" | 359 "1: \n\t" |
400 : : "r" (((int)blocks)+128*6) | 400 : : "r" (((int)blocks)+128*6) |
401 : "%eax" | 401 : "%eax" |
402 ); | 402 ); |
403 } | 403 } |
404 | 404 |
405 static int pix_sum16_mmx(UINT8 * pix, int line_size){ | 405 static int pix_sum16_mmx(uint8_t * pix, int line_size){ |
406 const int h=16; | 406 const int h=16; |
407 int sum; | 407 int sum; |
408 int index= -line_size*h; | 408 int index= -line_size*h; |
409 | 409 |
410 __asm __volatile( | 410 __asm __volatile( |
503 "movd %%mm1,%1\n" | 503 "movd %%mm1,%1\n" |
504 : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" ); | 504 : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" ); |
505 return tmp; | 505 return tmp; |
506 } | 506 } |
507 | 507 |
508 static int sse16_mmx(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) { | 508 static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { |
509 int tmp; | 509 int tmp; |
510 asm volatile ( | 510 asm volatile ( |
511 "movl $16,%%ecx\n" | 511 "movl $16,%%ecx\n" |
512 "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ | 512 "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ |
513 "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ | 513 "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ |
1156 : "r"(dstStride), "r"(2*dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*dstStride)\ | 1156 : "r"(dstStride), "r"(2*dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*dstStride)\ |
1157 : "memory"\ | 1157 : "memory"\ |
1158 );\ | 1158 );\ |
1159 }\ | 1159 }\ |
1160 \ | 1160 \ |
1161 static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ | 1161 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ |
1162 OPNAME ## pixels8_mmx(dst, src, stride, 8);\ | 1162 OPNAME ## pixels8_mmx(dst, src, stride, 8);\ |
1163 }\ | 1163 }\ |
1164 \ | 1164 \ |
1165 static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1165 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1166 uint64_t temp[8];\ | 1166 uint64_t temp[8];\ |
1167 uint8_t * const half= (uint8_t*)temp;\ | 1167 uint8_t * const half= (uint8_t*)temp;\ |
1168 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ | 1168 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ |
1169 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ | 1169 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ |
1170 }\ | 1170 }\ |
1171 \ | 1171 \ |
1172 static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1172 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1173 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ | 1173 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ |
1174 }\ | 1174 }\ |
1175 \ | 1175 \ |
1176 static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1176 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1177 uint64_t temp[8];\ | 1177 uint64_t temp[8];\ |
1178 uint8_t * const half= (uint8_t*)temp;\ | 1178 uint8_t * const half= (uint8_t*)temp;\ |
1179 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ | 1179 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ |
1180 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ | 1180 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ |
1181 }\ | 1181 }\ |
1182 \ | 1182 \ |
1183 static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1183 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1184 uint64_t temp[8];\ | 1184 uint64_t temp[8];\ |
1185 uint8_t * const half= (uint8_t*)temp;\ | 1185 uint8_t * const half= (uint8_t*)temp;\ |
1186 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ | 1186 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ |
1187 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ | 1187 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ |
1188 }\ | 1188 }\ |
1189 \ | 1189 \ |
1190 static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1190 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1191 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ | 1191 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ |
1192 }\ | 1192 }\ |
1193 \ | 1193 \ |
1194 static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1194 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1195 uint64_t temp[8];\ | 1195 uint64_t temp[8];\ |
1196 uint8_t * const half= (uint8_t*)temp;\ | 1196 uint8_t * const half= (uint8_t*)temp;\ |
1197 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ | 1197 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ |
1198 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ | 1198 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ |
1199 }\ | 1199 }\ |
1200 static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1200 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1201 uint64_t half[8 + 9];\ | 1201 uint64_t half[8 + 9];\ |
1202 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1202 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1203 uint8_t * const halfHV= ((uint8_t*)half);\ | 1203 uint8_t * const halfHV= ((uint8_t*)half);\ |
1204 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1204 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1205 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ | 1205 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ |
1206 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1206 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1207 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ | 1207 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ |
1208 }\ | 1208 }\ |
1209 static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1209 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1210 uint64_t half[8 + 9];\ | 1210 uint64_t half[8 + 9];\ |
1211 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1211 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1212 uint8_t * const halfHV= ((uint8_t*)half);\ | 1212 uint8_t * const halfHV= ((uint8_t*)half);\ |
1213 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1213 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1214 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ | 1214 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ |
1215 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1215 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1216 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ | 1216 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ |
1217 }\ | 1217 }\ |
1218 static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1218 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1219 uint64_t half[8 + 9];\ | 1219 uint64_t half[8 + 9];\ |
1220 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1220 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1221 uint8_t * const halfHV= ((uint8_t*)half);\ | 1221 uint8_t * const halfHV= ((uint8_t*)half);\ |
1222 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1222 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1223 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ | 1223 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ |
1224 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1224 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1225 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ | 1225 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ |
1226 }\ | 1226 }\ |
1227 static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1227 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1228 uint64_t half[8 + 9];\ | 1228 uint64_t half[8 + 9];\ |
1229 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1229 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1230 uint8_t * const halfHV= ((uint8_t*)half);\ | 1230 uint8_t * const halfHV= ((uint8_t*)half);\ |
1231 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1231 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1232 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ | 1232 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ |
1233 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1233 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1234 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ | 1234 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ |
1235 }\ | 1235 }\ |
1236 static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1236 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1237 uint64_t half[8 + 9];\ | 1237 uint64_t half[8 + 9];\ |
1238 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1238 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1239 uint8_t * const halfHV= ((uint8_t*)half);\ | 1239 uint8_t * const halfHV= ((uint8_t*)half);\ |
1240 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1240 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1241 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1241 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1242 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ | 1242 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ |
1243 }\ | 1243 }\ |
1244 static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1244 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1245 uint64_t half[8 + 9];\ | 1245 uint64_t half[8 + 9];\ |
1246 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1246 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1247 uint8_t * const halfHV= ((uint8_t*)half);\ | 1247 uint8_t * const halfHV= ((uint8_t*)half);\ |
1248 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1248 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1249 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1249 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1250 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ | 1250 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ |
1251 }\ | 1251 }\ |
1252 static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1252 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1253 uint64_t half[8 + 9];\ | 1253 uint64_t half[8 + 9];\ |
1254 uint8_t * const halfH= ((uint8_t*)half);\ | 1254 uint8_t * const halfH= ((uint8_t*)half);\ |
1255 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1255 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1256 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ | 1256 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ |
1257 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ | 1257 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
1258 }\ | 1258 }\ |
1259 static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1259 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1260 uint64_t half[8 + 9];\ | 1260 uint64_t half[8 + 9];\ |
1261 uint8_t * const halfH= ((uint8_t*)half);\ | 1261 uint8_t * const halfH= ((uint8_t*)half);\ |
1262 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1262 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1263 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ | 1263 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ |
1264 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ | 1264 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
1265 }\ | 1265 }\ |
1266 static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1266 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1267 uint64_t half[9];\ | 1267 uint64_t half[9];\ |
1268 uint8_t * const halfH= ((uint8_t*)half);\ | 1268 uint8_t * const halfH= ((uint8_t*)half);\ |
1269 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1269 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1270 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ | 1270 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
1271 }\ | 1271 }\ |
1272 static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ | 1272 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ |
1273 OPNAME ## pixels16_mmx(dst, src, stride, 16);\ | 1273 OPNAME ## pixels16_mmx(dst, src, stride, 16);\ |
1274 }\ | 1274 }\ |
1275 \ | 1275 \ |
1276 static void OPNAME ## qpel16_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1276 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1277 uint64_t temp[32];\ | 1277 uint64_t temp[32];\ |
1278 uint8_t * const half= (uint8_t*)temp;\ | 1278 uint8_t * const half= (uint8_t*)temp;\ |
1279 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ | 1279 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ |
1280 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ | 1280 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ |
1281 }\ | 1281 }\ |
1282 \ | 1282 \ |
1283 static void OPNAME ## qpel16_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1283 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1284 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\ | 1284 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\ |
1285 }\ | 1285 }\ |
1286 \ | 1286 \ |
1287 static void OPNAME ## qpel16_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1287 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1288 uint64_t temp[32];\ | 1288 uint64_t temp[32];\ |
1289 uint8_t * const half= (uint8_t*)temp;\ | 1289 uint8_t * const half= (uint8_t*)temp;\ |
1290 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ | 1290 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ |
1291 OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\ | 1291 OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\ |
1292 }\ | 1292 }\ |
1293 \ | 1293 \ |
1294 static void OPNAME ## qpel16_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1294 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1295 uint64_t temp[32];\ | 1295 uint64_t temp[32];\ |
1296 uint8_t * const half= (uint8_t*)temp;\ | 1296 uint8_t * const half= (uint8_t*)temp;\ |
1297 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ | 1297 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ |
1298 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ | 1298 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ |
1299 }\ | 1299 }\ |
1300 \ | 1300 \ |
1301 static void OPNAME ## qpel16_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1301 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1302 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\ | 1302 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\ |
1303 }\ | 1303 }\ |
1304 \ | 1304 \ |
1305 static void OPNAME ## qpel16_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1305 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1306 uint64_t temp[32];\ | 1306 uint64_t temp[32];\ |
1307 uint8_t * const half= (uint8_t*)temp;\ | 1307 uint8_t * const half= (uint8_t*)temp;\ |
1308 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ | 1308 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ |
1309 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ | 1309 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ |
1310 }\ | 1310 }\ |
1311 static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1311 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1312 uint64_t half[16*2 + 17*2];\ | 1312 uint64_t half[16*2 + 17*2];\ |
1313 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 1313 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1314 uint8_t * const halfHV= ((uint8_t*)half);\ | 1314 uint8_t * const halfHV= ((uint8_t*)half);\ |
1315 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1315 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1316 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ | 1316 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ |
1317 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1317 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1318 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ | 1318 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ |
1319 }\ | 1319 }\ |
1320 static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1320 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1321 uint64_t half[16*2 + 17*2];\ | 1321 uint64_t half[16*2 + 17*2];\ |
1322 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 1322 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1323 uint8_t * const halfHV= ((uint8_t*)half);\ | 1323 uint8_t * const halfHV= ((uint8_t*)half);\ |
1324 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1324 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1325 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ | 1325 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ |
1326 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1326 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1327 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ | 1327 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ |
1328 }\ | 1328 }\ |
1329 static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1329 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1330 uint64_t half[16*2 + 17*2];\ | 1330 uint64_t half[16*2 + 17*2];\ |
1331 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 1331 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1332 uint8_t * const halfHV= ((uint8_t*)half);\ | 1332 uint8_t * const halfHV= ((uint8_t*)half);\ |
1333 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1333 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1334 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ | 1334 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ |
1335 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1335 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1336 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ | 1336 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ |
1337 }\ | 1337 }\ |
1338 static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1338 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1339 uint64_t half[16*2 + 17*2];\ | 1339 uint64_t half[16*2 + 17*2];\ |
1340 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 1340 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1341 uint8_t * const halfHV= ((uint8_t*)half);\ | 1341 uint8_t * const halfHV= ((uint8_t*)half);\ |
1342 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1342 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1343 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ | 1343 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ |
1344 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1344 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1345 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ | 1345 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ |
1346 }\ | 1346 }\ |
1347 static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1347 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1348 uint64_t half[16*2 + 17*2];\ | 1348 uint64_t half[16*2 + 17*2];\ |
1349 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 1349 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1350 uint8_t * const halfHV= ((uint8_t*)half);\ | 1350 uint8_t * const halfHV= ((uint8_t*)half);\ |
1351 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1351 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1352 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1352 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1353 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ | 1353 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ |
1354 }\ | 1354 }\ |
1355 static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1355 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1356 uint64_t half[16*2 + 17*2];\ | 1356 uint64_t half[16*2 + 17*2];\ |
1357 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 1357 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1358 uint8_t * const halfHV= ((uint8_t*)half);\ | 1358 uint8_t * const halfHV= ((uint8_t*)half);\ |
1359 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1359 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1360 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1360 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1361 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ | 1361 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ |
1362 }\ | 1362 }\ |
1363 static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1363 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1364 uint64_t half[17*2];\ | 1364 uint64_t half[17*2];\ |
1365 uint8_t * const halfH= ((uint8_t*)half);\ | 1365 uint8_t * const halfH= ((uint8_t*)half);\ |
1366 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1366 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1367 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ | 1367 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ |
1368 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ | 1368 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ |
1369 }\ | 1369 }\ |
1370 static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1370 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1371 uint64_t half[17*2];\ | 1371 uint64_t half[17*2];\ |
1372 uint8_t * const halfH= ((uint8_t*)half);\ | 1372 uint8_t * const halfH= ((uint8_t*)half);\ |
1373 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1373 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1374 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ | 1374 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ |
1375 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ | 1375 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ |
1376 }\ | 1376 }\ |
1377 static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1377 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ |
1378 uint64_t half[17*2];\ | 1378 uint64_t half[17*2];\ |
1379 uint8_t * const halfH= ((uint8_t*)half);\ | 1379 uint8_t * const halfH= ((uint8_t*)half);\ |
1380 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1380 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1381 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ | 1381 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ |
1382 } | 1382 } |