comparison ppc/dsputil_altivec.c @ 1839:b370288f004d libavcodec

Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
author michael
date Sat, 28 Feb 2004 15:03:53 +0000
parents dea5b2946999
children 66215baae7b9
comparison
equal deleted inserted replaced
1838:8cdbb74c2f4b 1839:b370288f004d
47 47
48 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 48 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
49 { 49 {
50 int i; 50 int i;
51 int s __attribute__((aligned(16))); 51 int s __attribute__((aligned(16)));
52 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); 52 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
53 vector unsigned char *tv; 53 vector unsigned char *tv;
54 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; 54 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
55 vector unsigned int sad; 55 vector unsigned int sad;
56 vector signed int sumdiffs; 56 vector signed int sumdiffs;
57 57
94 94
95 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 95 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
96 { 96 {
97 int i; 97 int i;
98 int s __attribute__((aligned(16))); 98 int s __attribute__((aligned(16)));
99 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); 99 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
100 vector unsigned char *tv; 100 vector unsigned char *tv;
101 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; 101 vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
102 vector unsigned int sad; 102 vector unsigned int sad;
103 vector signed int sumdiffs; 103 vector signed int sumdiffs;
104 uint8_t *pix3 = pix2 + line_size; 104 uint8_t *pix3 = pix2 + line_size;
155 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 155 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
156 { 156 {
157 int i; 157 int i;
158 int s __attribute__((aligned(16))); 158 int s __attribute__((aligned(16)));
159 uint8_t *pix3 = pix2 + line_size; 159 uint8_t *pix3 = pix2 + line_size;
160 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); 160 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
161 const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); 161 const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2);
162 vector unsigned char *tv, avgv, t5; 162 vector unsigned char *tv, avgv, t5;
163 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; 163 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
164 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; 164 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
165 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; 165 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
166 vector unsigned short avghv, avglv; 166 vector unsigned short avghv, avglv;
255 255
256 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 256 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
257 { 257 {
258 int i; 258 int i;
259 int s __attribute__((aligned(16))); 259 int s __attribute__((aligned(16)));
260 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); 260 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
261 vector unsigned char perm1, perm2, *pix1v, *pix2v; 261 vector unsigned char perm1, perm2, *pix1v, *pix2v;
262 vector unsigned char t1, t2, t3,t4, t5; 262 vector unsigned char t1, t2, t3,t4, t5;
263 vector unsigned int sad; 263 vector unsigned int sad;
264 vector signed int sumdiffs; 264 vector signed int sumdiffs;
265 265
297 297
298 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 298 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
299 { 299 {
300 int i; 300 int i;
301 int s __attribute__((aligned(16))); 301 int s __attribute__((aligned(16)));
302 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); 302 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
303 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; 303 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
304 vector unsigned char t1, t2, t3,t4, t5; 304 vector unsigned char t1, t2, t3,t4, t5;
305 vector unsigned int sad; 305 vector unsigned int sad;
306 vector signed int sumdiffs; 306 vector signed int sumdiffs;
307 307
342 342
343 int pix_norm1_altivec(uint8_t *pix, int line_size) 343 int pix_norm1_altivec(uint8_t *pix, int line_size)
344 { 344 {
345 int i; 345 int i;
346 int s __attribute__((aligned(16))); 346 int s __attribute__((aligned(16)));
347 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); 347 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
348 vector unsigned char *tv; 348 vector unsigned char *tv;
349 vector unsigned char pixv; 349 vector unsigned char pixv;
350 vector unsigned int sv; 350 vector unsigned int sv;
351 vector signed int sum; 351 vector signed int sum;
352 352
378 */ 378 */
379 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 379 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
380 { 380 {
381 int i; 381 int i;
382 int s __attribute__((aligned(16))); 382 int s __attribute__((aligned(16)));
383 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); 383 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
384 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; 384 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
385 vector unsigned char t1, t2, t3,t4, t5; 385 vector unsigned char t1, t2, t3,t4, t5;
386 vector unsigned int sum; 386 vector unsigned int sum;
387 vector signed int sumsqr; 387 vector signed int sumsqr;
388 388
434 */ 434 */
435 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 435 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
436 { 436 {
437 int i; 437 int i;
438 int s __attribute__((aligned(16))); 438 int s __attribute__((aligned(16)));
439 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); 439 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
440 vector unsigned char perm1, perm2, *pix1v, *pix2v; 440 vector unsigned char perm1, perm2, *pix1v, *pix2v;
441 vector unsigned char t1, t2, t3,t4, t5; 441 vector unsigned char t1, t2, t3,t4, t5;
442 vector unsigned int sum; 442 vector unsigned int sum;
443 vector signed int sumsqr; 443 vector signed int sumsqr;
444 444
478 return s; 478 return s;
479 } 479 }
480 480
481 int pix_sum_altivec(uint8_t * pix, int line_size) 481 int pix_sum_altivec(uint8_t * pix, int line_size)
482 { 482 {
483 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); 483 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
484 vector unsigned char perm, *pixv; 484 vector unsigned char perm, *pixv;
485 vector unsigned char t1; 485 vector unsigned char t1;
486 vector unsigned int sad; 486 vector unsigned int sad;
487 vector signed int sumdiffs; 487 vector signed int sumdiffs;
488 488
513 513
514 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) 514 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
515 { 515 {
516 int i; 516 int i;
517 vector unsigned char perm, bytes, *pixv; 517 vector unsigned char perm, bytes, *pixv;
518 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); 518 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
519 vector signed short shorts; 519 vector signed short shorts;
520 520
521 for(i=0;i<8;i++) 521 for(i=0;i<8;i++)
522 { 522 {
523 // Read potentially unaligned pixels. 523 // Read potentially unaligned pixels.
540 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, 540 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
541 const uint8_t *s2, int stride) 541 const uint8_t *s2, int stride)
542 { 542 {
543 int i; 543 int i;
544 vector unsigned char perm, bytes, *pixv; 544 vector unsigned char perm, bytes, *pixv;
545 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); 545 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
546 vector signed short shorts1, shorts2; 546 vector signed short shorts1, shorts2;
547 547
548 for(i=0;i<4;i++) 548 for(i=0;i<4;i++)
549 { 549 {
550 // Read potentially unaligned pixels 550 // Read potentially unaligned pixels
652 int i; 652 int i;
653 653
654 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); 654 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
655 655
656 for(i=0; i<h; i++) { 656 for(i=0; i<h; i++) {
657 *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); 657 *((uint32_t*)(block)) = LD32(pixels);
658 *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l); 658 *((uint32_t*)(block+4)) = LD32(pixels+4);
659 *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l); 659 *((uint32_t*)(block+8)) = LD32(pixels+8);
660 *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l); 660 *((uint32_t*)(block+12)) = LD32(pixels+12);
661 pixels+=line_size; 661 pixels+=line_size;
662 block +=line_size; 662 block +=line_size;
663 } 663 }
664 664
665 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); 665 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
727 int i; 727 int i;
728 728
729 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); 729 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
730 730
731 for(i=0; i<h; i++) { 731 for(i=0; i<h; i++) {
732 op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); 732 op_avg(*((uint32_t*)(block)),LD32(pixels));
733 op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l)); 733 op_avg(*((uint32_t*)(block+4)),LD32(pixels+4));
734 op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l)); 734 op_avg(*((uint32_t*)(block+8)),LD32(pixels+8));
735 op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l)); 735 op_avg(*((uint32_t*)(block+12)),LD32(pixels+12));
736 pixels+=line_size; 736 pixels+=line_size;
737 block +=line_size; 737 block +=line_size;
738 } 738 }
739 739
740 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); 740 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
876 pixelsavg; 876 pixelsavg;
877 register vector unsigned char 877 register vector unsigned char
878 blockv, temp1, temp2; 878 blockv, temp1, temp2;
879 register vector unsigned short 879 register vector unsigned short
880 pixelssum1, pixelssum2, temp3; 880 pixelssum1, pixelssum2, temp3;
881 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 881 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
882 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 882 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
883 883
884 temp1 = vec_ld(0, pixels); 884 temp1 = vec_ld(0, pixels);
885 temp2 = vec_ld(16, pixels); 885 temp2 = vec_ld(16, pixels);
886 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); 886 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
887 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) 887 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
991 pixelsavg; 991 pixelsavg;
992 register vector unsigned char 992 register vector unsigned char
993 blockv, temp1, temp2; 993 blockv, temp1, temp2;
994 register vector unsigned short 994 register vector unsigned short
995 pixelssum1, pixelssum2, temp3; 995 pixelssum1, pixelssum2, temp3;
996 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 996 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
997 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); 997 register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
998 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 998 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
999 999
1000 temp1 = vec_ld(0, pixels); 1000 temp1 = vec_ld(0, pixels);
1001 temp2 = vec_ld(16, pixels); 1001 temp2 = vec_ld(16, pixels);
1002 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); 1002 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1003 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) 1003 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1107 register vector unsigned char 1107 register vector unsigned char
1108 blockv, temp1, temp2; 1108 blockv, temp1, temp2;
1109 register vector unsigned short 1109 register vector unsigned short
1110 pixelssum1, pixelssum2, temp3, 1110 pixelssum1, pixelssum2, temp3,
1111 pixelssum3, pixelssum4, temp4; 1111 pixelssum3, pixelssum4, temp4;
1112 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 1112 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
1113 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 1113 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
1114 1114
1115 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); 1115 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1116 1116
1117 temp1 = vec_ld(0, pixels); 1117 temp1 = vec_ld(0, pixels);
1118 temp2 = vec_ld(16, pixels); 1118 temp2 = vec_ld(16, pixels);
1228 register vector unsigned char 1228 register vector unsigned char
1229 blockv, temp1, temp2; 1229 blockv, temp1, temp2;
1230 register vector unsigned short 1230 register vector unsigned short
1231 pixelssum1, pixelssum2, temp3, 1231 pixelssum1, pixelssum2, temp3,
1232 pixelssum3, pixelssum4, temp4; 1232 pixelssum3, pixelssum4, temp4;
1233 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 1233 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
1234 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); 1234 register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
1235 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 1235 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
1236 1236
1237 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1237 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1238 1238
1239 temp1 = vec_ld(0, pixels); 1239 temp1 = vec_ld(0, pixels);
1240 temp2 = vec_ld(16, pixels); 1240 temp2 = vec_ld(16, pixels);