Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 1839:b370288f004d libavcodec
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
author | michael |
---|---|
date | Sat, 28 Feb 2004 15:03:53 +0000 |
parents | dea5b2946999 |
children | 66215baae7b9 |
comparison
equal
deleted
inserted
replaced
1838:8cdbb74c2f4b | 1839:b370288f004d |
---|---|
47 | 47 |
48 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 48 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
49 { | 49 { |
50 int i; | 50 int i; |
51 int s __attribute__((aligned(16))); | 51 int s __attribute__((aligned(16))); |
52 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); | 52 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); |
53 vector unsigned char *tv; | 53 vector unsigned char *tv; |
54 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; | 54 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; |
55 vector unsigned int sad; | 55 vector unsigned int sad; |
56 vector signed int sumdiffs; | 56 vector signed int sumdiffs; |
57 | 57 |
94 | 94 |
95 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 95 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
96 { | 96 { |
97 int i; | 97 int i; |
98 int s __attribute__((aligned(16))); | 98 int s __attribute__((aligned(16))); |
99 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); | 99 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); |
100 vector unsigned char *tv; | 100 vector unsigned char *tv; |
101 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; | 101 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; |
102 vector unsigned int sad; | 102 vector unsigned int sad; |
103 vector signed int sumdiffs; | 103 vector signed int sumdiffs; |
104 uint8_t *pix3 = pix2 + line_size; | 104 uint8_t *pix3 = pix2 + line_size; |
155 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 155 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
156 { | 156 { |
157 int i; | 157 int i; |
158 int s __attribute__((aligned(16))); | 158 int s __attribute__((aligned(16))); |
159 uint8_t *pix3 = pix2 + line_size; | 159 uint8_t *pix3 = pix2 + line_size; |
160 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); | 160 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); |
161 const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); | 161 const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2); |
162 vector unsigned char *tv, avgv, t5; | 162 vector unsigned char *tv, avgv, t5; |
163 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; | 163 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; |
164 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; | 164 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; |
165 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; | 165 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; |
166 vector unsigned short avghv, avglv; | 166 vector unsigned short avghv, avglv; |
255 | 255 |
256 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 256 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
257 { | 257 { |
258 int i; | 258 int i; |
259 int s __attribute__((aligned(16))); | 259 int s __attribute__((aligned(16))); |
260 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); | 260 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); |
261 vector unsigned char perm1, perm2, *pix1v, *pix2v; | 261 vector unsigned char perm1, perm2, *pix1v, *pix2v; |
262 vector unsigned char t1, t2, t3,t4, t5; | 262 vector unsigned char t1, t2, t3,t4, t5; |
263 vector unsigned int sad; | 263 vector unsigned int sad; |
264 vector signed int sumdiffs; | 264 vector signed int sumdiffs; |
265 | 265 |
297 | 297 |
298 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 298 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
299 { | 299 { |
300 int i; | 300 int i; |
301 int s __attribute__((aligned(16))); | 301 int s __attribute__((aligned(16))); |
302 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); | 302 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); |
303 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; | 303 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; |
304 vector unsigned char t1, t2, t3,t4, t5; | 304 vector unsigned char t1, t2, t3,t4, t5; |
305 vector unsigned int sad; | 305 vector unsigned int sad; |
306 vector signed int sumdiffs; | 306 vector signed int sumdiffs; |
307 | 307 |
342 | 342 |
343 int pix_norm1_altivec(uint8_t *pix, int line_size) | 343 int pix_norm1_altivec(uint8_t *pix, int line_size) |
344 { | 344 { |
345 int i; | 345 int i; |
346 int s __attribute__((aligned(16))); | 346 int s __attribute__((aligned(16))); |
347 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); | 347 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); |
348 vector unsigned char *tv; | 348 vector unsigned char *tv; |
349 vector unsigned char pixv; | 349 vector unsigned char pixv; |
350 vector unsigned int sv; | 350 vector unsigned int sv; |
351 vector signed int sum; | 351 vector signed int sum; |
352 | 352 |
378 */ | 378 */ |
379 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 379 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
380 { | 380 { |
381 int i; | 381 int i; |
382 int s __attribute__((aligned(16))); | 382 int s __attribute__((aligned(16))); |
383 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); | 383 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); |
384 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; | 384 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; |
385 vector unsigned char t1, t2, t3,t4, t5; | 385 vector unsigned char t1, t2, t3,t4, t5; |
386 vector unsigned int sum; | 386 vector unsigned int sum; |
387 vector signed int sumsqr; | 387 vector signed int sumsqr; |
388 | 388 |
434 */ | 434 */ |
435 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 435 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
436 { | 436 { |
437 int i; | 437 int i; |
438 int s __attribute__((aligned(16))); | 438 int s __attribute__((aligned(16))); |
439 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); | 439 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); |
440 vector unsigned char perm1, perm2, *pix1v, *pix2v; | 440 vector unsigned char perm1, perm2, *pix1v, *pix2v; |
441 vector unsigned char t1, t2, t3,t4, t5; | 441 vector unsigned char t1, t2, t3,t4, t5; |
442 vector unsigned int sum; | 442 vector unsigned int sum; |
443 vector signed int sumsqr; | 443 vector signed int sumsqr; |
444 | 444 |
478 return s; | 478 return s; |
479 } | 479 } |
480 | 480 |
481 int pix_sum_altivec(uint8_t * pix, int line_size) | 481 int pix_sum_altivec(uint8_t * pix, int line_size) |
482 { | 482 { |
483 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); | 483 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); |
484 vector unsigned char perm, *pixv; | 484 vector unsigned char perm, *pixv; |
485 vector unsigned char t1; | 485 vector unsigned char t1; |
486 vector unsigned int sad; | 486 vector unsigned int sad; |
487 vector signed int sumdiffs; | 487 vector signed int sumdiffs; |
488 | 488 |
513 | 513 |
514 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) | 514 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
515 { | 515 { |
516 int i; | 516 int i; |
517 vector unsigned char perm, bytes, *pixv; | 517 vector unsigned char perm, bytes, *pixv; |
518 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); | 518 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); |
519 vector signed short shorts; | 519 vector signed short shorts; |
520 | 520 |
521 for(i=0;i<8;i++) | 521 for(i=0;i<8;i++) |
522 { | 522 { |
523 // Read potentially unaligned pixels. | 523 // Read potentially unaligned pixels. |
540 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, | 540 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, |
541 const uint8_t *s2, int stride) | 541 const uint8_t *s2, int stride) |
542 { | 542 { |
543 int i; | 543 int i; |
544 vector unsigned char perm, bytes, *pixv; | 544 vector unsigned char perm, bytes, *pixv; |
545 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); | 545 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); |
546 vector signed short shorts1, shorts2; | 546 vector signed short shorts1, shorts2; |
547 | 547 |
548 for(i=0;i<4;i++) | 548 for(i=0;i<4;i++) |
549 { | 549 { |
550 // Read potentially unaligned pixels | 550 // Read potentially unaligned pixels |
652 int i; | 652 int i; |
653 | 653 |
654 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); | 654 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); |
655 | 655 |
656 for(i=0; i<h; i++) { | 656 for(i=0; i<h; i++) { |
657 *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); | 657 *((uint32_t*)(block)) = LD32(pixels); |
658 *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l); | 658 *((uint32_t*)(block+4)) = LD32(pixels+4); |
659 *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l); | 659 *((uint32_t*)(block+8)) = LD32(pixels+8); |
660 *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l); | 660 *((uint32_t*)(block+12)) = LD32(pixels+12); |
661 pixels+=line_size; | 661 pixels+=line_size; |
662 block +=line_size; | 662 block +=line_size; |
663 } | 663 } |
664 | 664 |
665 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); | 665 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); |
727 int i; | 727 int i; |
728 | 728 |
729 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); | 729 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); |
730 | 730 |
731 for(i=0; i<h; i++) { | 731 for(i=0; i<h; i++) { |
732 op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); | 732 op_avg(*((uint32_t*)(block)),LD32(pixels)); |
733 op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l)); | 733 op_avg(*((uint32_t*)(block+4)),LD32(pixels+4)); |
734 op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l)); | 734 op_avg(*((uint32_t*)(block+8)),LD32(pixels+8)); |
735 op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l)); | 735 op_avg(*((uint32_t*)(block+12)),LD32(pixels+12)); |
736 pixels+=line_size; | 736 pixels+=line_size; |
737 block +=line_size; | 737 block +=line_size; |
738 } | 738 } |
739 | 739 |
740 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); | 740 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); |
876 pixelsavg; | 876 pixelsavg; |
877 register vector unsigned char | 877 register vector unsigned char |
878 blockv, temp1, temp2; | 878 blockv, temp1, temp2; |
879 register vector unsigned short | 879 register vector unsigned short |
880 pixelssum1, pixelssum2, temp3; | 880 pixelssum1, pixelssum2, temp3; |
881 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | 881 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); |
882 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | 882 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); |
883 | 883 |
884 temp1 = vec_ld(0, pixels); | 884 temp1 = vec_ld(0, pixels); |
885 temp2 = vec_ld(16, pixels); | 885 temp2 = vec_ld(16, pixels); |
886 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 886 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
887 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | 887 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) |
991 pixelsavg; | 991 pixelsavg; |
992 register vector unsigned char | 992 register vector unsigned char |
993 blockv, temp1, temp2; | 993 blockv, temp1, temp2; |
994 register vector unsigned short | 994 register vector unsigned short |
995 pixelssum1, pixelssum2, temp3; | 995 pixelssum1, pixelssum2, temp3; |
996 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | 996 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); |
997 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); | 997 register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1); |
998 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | 998 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); |
999 | 999 |
1000 temp1 = vec_ld(0, pixels); | 1000 temp1 = vec_ld(0, pixels); |
1001 temp2 = vec_ld(16, pixels); | 1001 temp2 = vec_ld(16, pixels); |
1002 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 1002 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
1003 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | 1003 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) |
1107 register vector unsigned char | 1107 register vector unsigned char |
1108 blockv, temp1, temp2; | 1108 blockv, temp1, temp2; |
1109 register vector unsigned short | 1109 register vector unsigned short |
1110 pixelssum1, pixelssum2, temp3, | 1110 pixelssum1, pixelssum2, temp3, |
1111 pixelssum3, pixelssum4, temp4; | 1111 pixelssum3, pixelssum4, temp4; |
1112 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | 1112 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); |
1113 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | 1113 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); |
1114 | 1114 |
1115 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); | 1115 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); |
1116 | 1116 |
1117 temp1 = vec_ld(0, pixels); | 1117 temp1 = vec_ld(0, pixels); |
1118 temp2 = vec_ld(16, pixels); | 1118 temp2 = vec_ld(16, pixels); |
1228 register vector unsigned char | 1228 register vector unsigned char |
1229 blockv, temp1, temp2; | 1229 blockv, temp1, temp2; |
1230 register vector unsigned short | 1230 register vector unsigned short |
1231 pixelssum1, pixelssum2, temp3, | 1231 pixelssum1, pixelssum2, temp3, |
1232 pixelssum3, pixelssum4, temp4; | 1232 pixelssum3, pixelssum4, temp4; |
1233 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | 1233 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); |
1234 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); | 1234 register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1); |
1235 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | 1235 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); |
1236 | 1236 |
1237 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1237 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1238 | 1238 |
1239 temp1 = vec_ld(0, pixels); | 1239 temp1 = vec_ld(0, pixels); |
1240 temp2 = vec_ld(16, pixels); | 1240 temp2 = vec_ld(16, pixels); |