Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 5746:55ed6dc5d476 libavcodec
Remove const vector macro indirection that is useless and obfuscating
now that the Metrowerks workarounds are gone.
author | diego |
---|---|
date | Mon, 01 Oct 2007 14:23:36 +0000 |
parents | bfc4b9bf297b |
children | 784dcbdc910f |
comparison
equal
deleted
inserted
replaced
5745:61768139733b | 5746:55ed6dc5d476 |
---|---|
53 | 53 |
54 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 54 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
55 { | 55 { |
56 int i; | 56 int i; |
57 DECLARE_ALIGNED_16(int, s); | 57 DECLARE_ALIGNED_16(int, s); |
58 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); | 58 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); |
59 vector unsigned char *tv; | 59 vector unsigned char *tv; |
60 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; | 60 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; |
61 vector unsigned int sad; | 61 vector unsigned int sad; |
62 vector signed int sumdiffs; | 62 vector signed int sumdiffs; |
63 | 63 |
100 | 100 |
101 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 101 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
102 { | 102 { |
103 int i; | 103 int i; |
104 DECLARE_ALIGNED_16(int, s); | 104 DECLARE_ALIGNED_16(int, s); |
105 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); | 105 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); |
106 vector unsigned char *tv; | 106 vector unsigned char *tv; |
107 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; | 107 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; |
108 vector unsigned int sad; | 108 vector unsigned int sad; |
109 vector signed int sumdiffs; | 109 vector signed int sumdiffs; |
110 uint8_t *pix3 = pix2 + line_size; | 110 uint8_t *pix3 = pix2 + line_size; |
161 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 161 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
162 { | 162 { |
163 int i; | 163 int i; |
164 DECLARE_ALIGNED_16(int, s); | 164 DECLARE_ALIGNED_16(int, s); |
165 uint8_t *pix3 = pix2 + line_size; | 165 uint8_t *pix3 = pix2 + line_size; |
166 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); | 166 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); |
167 const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2); | 167 const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); |
168 vector unsigned char *tv, avgv, t5; | 168 vector unsigned char *tv, avgv, t5; |
169 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; | 169 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; |
170 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; | 170 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; |
171 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; | 171 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; |
172 vector unsigned short avghv, avglv; | 172 vector unsigned short avghv, avglv; |
261 | 261 |
262 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 262 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
263 { | 263 { |
264 int i; | 264 int i; |
265 DECLARE_ALIGNED_16(int, s); | 265 DECLARE_ALIGNED_16(int, s); |
266 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); | 266 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); |
267 vector unsigned char perm1, perm2, *pix1v, *pix2v; | 267 vector unsigned char perm1, perm2, *pix1v, *pix2v; |
268 vector unsigned char t1, t2, t3,t4, t5; | 268 vector unsigned char t1, t2, t3,t4, t5; |
269 vector unsigned int sad; | 269 vector unsigned int sad; |
270 vector signed int sumdiffs; | 270 vector signed int sumdiffs; |
271 | 271 |
303 | 303 |
304 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 304 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
305 { | 305 { |
306 int i; | 306 int i; |
307 DECLARE_ALIGNED_16(int, s); | 307 DECLARE_ALIGNED_16(int, s); |
308 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); | 308 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); |
309 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; | 309 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; |
310 vector unsigned char t1, t2, t3,t4, t5; | 310 vector unsigned char t1, t2, t3,t4, t5; |
311 vector unsigned int sad; | 311 vector unsigned int sad; |
312 vector signed int sumdiffs; | 312 vector signed int sumdiffs; |
313 | 313 |
348 | 348 |
349 int pix_norm1_altivec(uint8_t *pix, int line_size) | 349 int pix_norm1_altivec(uint8_t *pix, int line_size) |
350 { | 350 { |
351 int i; | 351 int i; |
352 DECLARE_ALIGNED_16(int, s); | 352 DECLARE_ALIGNED_16(int, s); |
353 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); | 353 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); |
354 vector unsigned char *tv; | 354 vector unsigned char *tv; |
355 vector unsigned char pixv; | 355 vector unsigned char pixv; |
356 vector unsigned int sv; | 356 vector unsigned int sv; |
357 vector signed int sum; | 357 vector signed int sum; |
358 | 358 |
384 */ | 384 */ |
385 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 385 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
386 { | 386 { |
387 int i; | 387 int i; |
388 DECLARE_ALIGNED_16(int, s); | 388 DECLARE_ALIGNED_16(int, s); |
389 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); | 389 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); |
390 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; | 390 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; |
391 vector unsigned char t1, t2, t3,t4, t5; | 391 vector unsigned char t1, t2, t3,t4, t5; |
392 vector unsigned int sum; | 392 vector unsigned int sum; |
393 vector signed int sumsqr; | 393 vector signed int sumsqr; |
394 | 394 |
440 */ | 440 */ |
441 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 441 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
442 { | 442 { |
443 int i; | 443 int i; |
444 DECLARE_ALIGNED_16(int, s); | 444 DECLARE_ALIGNED_16(int, s); |
445 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); | 445 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); |
446 vector unsigned char perm1, perm2, *pix1v, *pix2v; | 446 vector unsigned char perm1, perm2, *pix1v, *pix2v; |
447 vector unsigned char t1, t2, t3,t4, t5; | 447 vector unsigned char t1, t2, t3,t4, t5; |
448 vector unsigned int sum; | 448 vector unsigned int sum; |
449 vector signed int sumsqr; | 449 vector signed int sumsqr; |
450 | 450 |
484 return s; | 484 return s; |
485 } | 485 } |
486 | 486 |
487 int pix_sum_altivec(uint8_t * pix, int line_size) | 487 int pix_sum_altivec(uint8_t * pix, int line_size) |
488 { | 488 { |
489 const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); | 489 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); |
490 vector unsigned char perm, *pixv; | 490 vector unsigned char perm, *pixv; |
491 vector unsigned char t1; | 491 vector unsigned char t1; |
492 vector unsigned int sad; | 492 vector unsigned int sad; |
493 vector signed int sumdiffs; | 493 vector signed int sumdiffs; |
494 | 494 |
519 | 519 |
520 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) | 520 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
521 { | 521 { |
522 int i; | 522 int i; |
523 vector unsigned char perm, bytes, *pixv; | 523 vector unsigned char perm, bytes, *pixv; |
524 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); | 524 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); |
525 vector signed short shorts; | 525 vector signed short shorts; |
526 | 526 |
527 for(i=0;i<8;i++) | 527 for(i=0;i<8;i++) |
528 { | 528 { |
529 // Read potentially unaligned pixels. | 529 // Read potentially unaligned pixels. |
546 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, | 546 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, |
547 const uint8_t *s2, int stride) | 547 const uint8_t *s2, int stride) |
548 { | 548 { |
549 int i; | 549 int i; |
550 vector unsigned char perm, bytes, *pixv; | 550 vector unsigned char perm, bytes, *pixv; |
551 const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); | 551 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); |
552 vector signed short shorts1, shorts2; | 552 vector signed short shorts1, shorts2; |
553 | 553 |
554 for(i=0;i<4;i++) | 554 for(i=0;i<4;i++) |
555 { | 555 { |
556 // Read potentially unaligned pixels | 556 // Read potentially unaligned pixels |
765 pixelsavg; | 765 pixelsavg; |
766 register vector unsigned char | 766 register vector unsigned char |
767 blockv, temp1, temp2; | 767 blockv, temp1, temp2; |
768 register vector unsigned short | 768 register vector unsigned short |
769 pixelssum1, pixelssum2, temp3; | 769 pixelssum1, pixelssum2, temp3; |
770 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); | 770 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
771 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); | 771 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
772 | 772 |
773 temp1 = vec_ld(0, pixels); | 773 temp1 = vec_ld(0, pixels); |
774 temp2 = vec_ld(16, pixels); | 774 temp2 = vec_ld(16, pixels); |
775 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 775 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
776 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | 776 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) |
841 pixelsavg; | 841 pixelsavg; |
842 register vector unsigned char | 842 register vector unsigned char |
843 blockv, temp1, temp2; | 843 blockv, temp1, temp2; |
844 register vector unsigned short | 844 register vector unsigned short |
845 pixelssum1, pixelssum2, temp3; | 845 pixelssum1, pixelssum2, temp3; |
846 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); | 846 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
847 register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1); | 847 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); |
848 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); | 848 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
849 | 849 |
850 temp1 = vec_ld(0, pixels); | 850 temp1 = vec_ld(0, pixels); |
851 temp2 = vec_ld(16, pixels); | 851 temp2 = vec_ld(16, pixels); |
852 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 852 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
853 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | 853 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) |
918 register vector unsigned char | 918 register vector unsigned char |
919 blockv, temp1, temp2; | 919 blockv, temp1, temp2; |
920 register vector unsigned short | 920 register vector unsigned short |
921 pixelssum1, pixelssum2, temp3, | 921 pixelssum1, pixelssum2, temp3, |
922 pixelssum3, pixelssum4, temp4; | 922 pixelssum3, pixelssum4, temp4; |
923 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); | 923 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
924 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); | 924 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
925 | 925 |
926 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); | 926 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); |
927 | 927 |
928 temp1 = vec_ld(0, pixels); | 928 temp1 = vec_ld(0, pixels); |
929 temp2 = vec_ld(16, pixels); | 929 temp2 = vec_ld(16, pixels); |
1000 register vector unsigned char | 1000 register vector unsigned char |
1001 blockv, temp1, temp2; | 1001 blockv, temp1, temp2; |
1002 register vector unsigned short | 1002 register vector unsigned short |
1003 pixelssum1, pixelssum2, temp3, | 1003 pixelssum1, pixelssum2, temp3, |
1004 pixelssum3, pixelssum4, temp4; | 1004 pixelssum3, pixelssum4, temp4; |
1005 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); | 1005 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
1006 register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1); | 1006 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); |
1007 register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); | 1007 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
1008 | 1008 |
1009 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1009 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1010 | 1010 |
1011 temp1 = vec_ld(0, pixels); | 1011 temp1 = vec_ld(0, pixels); |
1012 temp2 = vec_ld(16, pixels); | 1012 temp2 = vec_ld(16, pixels); |
1074 } | 1074 } |
1075 | 1075 |
1076 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | 1076 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
1077 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); | 1077 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); |
1078 int sum; | 1078 int sum; |
1079 register const_vector unsigned char vzero = | 1079 register const vector unsigned char vzero = |
1080 (const_vector unsigned char)vec_splat_u8(0); | 1080 (const vector unsigned char)vec_splat_u8(0); |
1081 register vector signed short temp0, temp1, temp2, temp3, temp4, | 1081 register vector signed short temp0, temp1, temp2, temp3, temp4, |
1082 temp5, temp6, temp7; | 1082 temp5, temp6, temp7; |
1083 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); | 1083 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); |
1084 { | 1084 { |
1085 register const_vector signed short vprod1 =(const_vector signed short) | 1085 register const vector signed short vprod1 =(const vector signed short) |
1086 AVV( 1,-1, 1,-1, 1,-1, 1,-1); | 1086 AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
1087 register const_vector signed short vprod2 =(const_vector signed short) | 1087 register const vector signed short vprod2 =(const vector signed short) |
1088 AVV( 1, 1,-1,-1, 1, 1,-1,-1); | 1088 AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
1089 register const_vector signed short vprod3 =(const_vector signed short) | 1089 register const vector signed short vprod3 =(const vector signed short) |
1090 AVV( 1, 1, 1, 1,-1,-1,-1,-1); | 1090 AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
1091 register const_vector unsigned char perm1 = (const_vector unsigned char) | 1091 register const vector unsigned char perm1 = (const vector unsigned char) |
1092 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, | 1092 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
1093 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); | 1093 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); |
1094 register const_vector unsigned char perm2 = (const_vector unsigned char) | 1094 register const vector unsigned char perm2 = (const vector unsigned char) |
1095 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, | 1095 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
1096 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); | 1096 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); |
1097 register const_vector unsigned char perm3 = (const_vector unsigned char) | 1097 register const vector unsigned char perm3 = (const vector unsigned char) |
1098 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | 1098 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
1099 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); | 1099 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); |
1100 | 1100 |
1101 #define ONEITERBUTTERFLY(i, res) \ | 1101 #define ONEITERBUTTERFLY(i, res) \ |
1102 { \ | 1102 { \ |
1222 temp3S REG_v(v11), | 1222 temp3S REG_v(v11), |
1223 temp4S REG_v(v12), | 1223 temp4S REG_v(v12), |
1224 temp5S REG_v(v13), | 1224 temp5S REG_v(v13), |
1225 temp6S REG_v(v14), | 1225 temp6S REG_v(v14), |
1226 temp7S REG_v(v15); | 1226 temp7S REG_v(v15); |
1227 register const_vector unsigned char vzero REG_v(v31)= | 1227 register const vector unsigned char vzero REG_v(v31)= |
1228 (const_vector unsigned char)vec_splat_u8(0); | 1228 (const vector unsigned char)vec_splat_u8(0); |
1229 { | 1229 { |
1230 register const_vector signed short vprod1 REG_v(v16)= | 1230 register const vector signed short vprod1 REG_v(v16)= |
1231 (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); | 1231 (const vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); |
1232 register const_vector signed short vprod2 REG_v(v17)= | 1232 register const vector signed short vprod2 REG_v(v17)= |
1233 (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); | 1233 (const vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); |
1234 register const_vector signed short vprod3 REG_v(v18)= | 1234 register const vector signed short vprod3 REG_v(v18)= |
1235 (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); | 1235 (const vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); |
1236 register const_vector unsigned char perm1 REG_v(v19)= | 1236 register const vector unsigned char perm1 REG_v(v19)= |
1237 (const_vector unsigned char) | 1237 (const vector unsigned char) |
1238 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, | 1238 AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
1239 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); | 1239 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); |
1240 register const_vector unsigned char perm2 REG_v(v20)= | 1240 register const vector unsigned char perm2 REG_v(v20)= |
1241 (const_vector unsigned char) | 1241 (const vector unsigned char) |
1242 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, | 1242 AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
1243 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); | 1243 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); |
1244 register const_vector unsigned char perm3 REG_v(v21)= | 1244 register const vector unsigned char perm3 REG_v(v21)= |
1245 (const_vector unsigned char) | 1245 (const vector unsigned char) |
1246 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | 1246 AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
1247 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); | 1247 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); |
1248 | 1248 |
1249 #define ONEITERBUTTERFLY(i, res1, res2) \ | 1249 #define ONEITERBUTTERFLY(i, res1, res2) \ |
1250 { \ | 1250 { \ |
1488 register int i; | 1488 register int i; |
1489 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | 1489 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
1490 register vector unsigned char blockv, temp1, temp2, blocktemp; | 1490 register vector unsigned char blockv, temp1, temp2, blocktemp; |
1491 register vector unsigned short pixelssum1, pixelssum2, temp3; | 1491 register vector unsigned short pixelssum1, pixelssum2, temp3; |
1492 | 1492 |
1493 register const_vector unsigned char vczero = (const_vector unsigned char) | 1493 register const vector unsigned char vczero = (const vector unsigned char) |
1494 vec_splat_u8(0); | 1494 vec_splat_u8(0); |
1495 register const_vector unsigned short vctwo = (const_vector unsigned short) | 1495 register const vector unsigned short vctwo = (const vector unsigned short) |
1496 vec_splat_u16(2); | 1496 vec_splat_u16(2); |
1497 | 1497 |
1498 temp1 = vec_ld(0, pixels); | 1498 temp1 = vec_ld(0, pixels); |
1499 temp2 = vec_ld(16, pixels); | 1499 temp2 = vec_ld(16, pixels); |
1500 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 1500 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |