comparison postprocess_altivec_template.c @ 133:51571e34b760 libpostproc

Move array specifiers outside DECLARE_ALIGNED() invocations
author mru
date Fri, 22 Jan 2010 03:25:11 +0000
parents 92db3581c916
children 218f5063fa65
comparison
equal deleted inserted replaced
132:a65cfe0fe4b2 133:51571e34b760
60 this code makes no assumption on src or stride. 60 this code makes no assumption on src or stride.
61 One could remove the recomputation of the perm 61 One could remove the recomputation of the perm
62 vector by assuming (stride % 16) == 0, unfortunately 62 vector by assuming (stride % 16) == 0, unfortunately
63 this is not always true. 63 this is not always true.
64 */ 64 */
65 DECLARE_ALIGNED(16, short, data[8]) = 65 DECLARE_ALIGNED(16, short, data)[8] =
66 { 66 {
67 ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1, 67 ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1,
68 data[0] * 2 + 1, 68 data[0] * 2 + 1,
69 c->QP * 2, 69 c->QP * 2,
70 c->QP * 4 70 c->QP * 4
220 */ 220 */
221 uint8_t *src2 = src; 221 uint8_t *src2 = src;
222 const vector signed int zero = vec_splat_s32(0); 222 const vector signed int zero = vec_splat_s32(0);
223 const int properStride = (stride % 16); 223 const int properStride = (stride % 16);
224 const int srcAlign = ((unsigned long)src2 % 16); 224 const int srcAlign = ((unsigned long)src2 % 16);
225 DECLARE_ALIGNED(16, short, qp[8]) = {c->QP}; 225 DECLARE_ALIGNED(16, short, qp)[8] = {c->QP};
226 vector signed short vqp = vec_ld(0, qp); 226 vector signed short vqp = vec_ld(0, qp);
227 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; 227 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9;
228 vector unsigned char vbA0, av_uninit(vbA1), av_uninit(vbA2), av_uninit(vbA3), av_uninit(vbA4), av_uninit(vbA5), av_uninit(vbA6), av_uninit(vbA7), av_uninit(vbA8), vbA9; 228 vector unsigned char vbA0, av_uninit(vbA1), av_uninit(vbA2), av_uninit(vbA3), av_uninit(vbA4), av_uninit(vbA5), av_uninit(vbA6), av_uninit(vbA7), av_uninit(vbA8), vbA9;
229 vector unsigned char vbB0, av_uninit(vbB1), av_uninit(vbB2), av_uninit(vbB3), av_uninit(vbB4), av_uninit(vbB5), av_uninit(vbB6), av_uninit(vbB7), av_uninit(vbB8), vbB9; 229 vector unsigned char vbB0, av_uninit(vbB1), av_uninit(vbB2), av_uninit(vbB3), av_uninit(vbB4), av_uninit(vbB5), av_uninit(vbB6), av_uninit(vbB7), av_uninit(vbB8), vbB9;
230 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; 230 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
416 can be removed by assuming proper alignment of 416 can be removed by assuming proper alignment of
417 src & stride :-( 417 src & stride :-(
418 */ 418 */
419 uint8_t *src2 = src + stride*3; 419 uint8_t *src2 = src + stride*3;
420 const vector signed int zero = vec_splat_s32(0); 420 const vector signed int zero = vec_splat_s32(0);
421 DECLARE_ALIGNED(16, short, qp[8]) = {8*c->QP}; 421 DECLARE_ALIGNED(16, short, qp)[8] = {8*c->QP};
422 vector signed short vqp = vec_splat( 422 vector signed short vqp = vec_splat(
423 (vector signed short)vec_ld(0, qp), 0); 423 (vector signed short)vec_ld(0, qp), 0);
424 424
425 #define LOAD_LINE(i) \ 425 #define LOAD_LINE(i) \
426 const vector unsigned char perm##i = \ 426 const vector unsigned char perm##i = \
536 this is not always true. Quite a lot of load/stores 536 this is not always true. Quite a lot of load/stores
537 can be removed by assuming proper alignment of 537 can be removed by assuming proper alignment of
538 src & stride :-( 538 src & stride :-(
539 */ 539 */
540 uint8_t *srcCopy = src; 540 uint8_t *srcCopy = src;
541 DECLARE_ALIGNED(16, uint8_t, dt[16]); 541 DECLARE_ALIGNED(16, uint8_t, dt)[16];
542 const vector signed int zero = vec_splat_s32(0); 542 const vector signed int zero = vec_splat_s32(0);
543 vector unsigned char v_dt; 543 vector unsigned char v_dt;
544 dt[0] = deringThreshold; 544 dt[0] = deringThreshold;
545 v_dt = vec_splat(vec_ld(0, dt), 0); 545 v_dt = vec_splat(vec_ld(0, dt), 0);
546 546
600 return; 600 return;
601 601
602 v_avg = vec_avg(v_min, v_max); 602 v_avg = vec_avg(v_min, v_max);
603 } 603 }
604 604
605 DECLARE_ALIGNED(16, signed int, S[8]); 605 DECLARE_ALIGNED(16, signed int, S)[8];
606 { 606 {
607 const vector unsigned short mask1 = (vector unsigned short) 607 const vector unsigned short mask1 = (vector unsigned short)
608 {0x0001, 0x0002, 0x0004, 0x0008, 608 {0x0001, 0x0002, 0x0004, 0x0008,
609 0x0010, 0x0020, 0x0040, 0x0080}; 609 0x0010, 0x0020, 0x0040, 0x0080};
610 const vector unsigned short mask2 = (vector unsigned short) 610 const vector unsigned short mask2 = (vector unsigned short)
696 } 696 }
697 697
698 /* I'm not sure the following is actually faster 698 /* I'm not sure the following is actually faster
699 than straight, unvectorized C code :-( */ 699 than straight, unvectorized C code :-( */
700 700
701 DECLARE_ALIGNED(16, int, tQP2[4]); 701 DECLARE_ALIGNED(16, int, tQP2)[4];
702 tQP2[0]= c->QP/2 + 1; 702 tQP2[0]= c->QP/2 + 1;
703 vector signed int vQP2 = vec_ld(0, tQP2); 703 vector signed int vQP2 = vec_ld(0, tQP2);
704 vQP2 = vec_splat(vQP2, 0); 704 vQP2 = vec_splat(vQP2, 0);
705 const vector signed int vsint32_8 = vec_splat_s32(8); 705 const vector signed int vsint32_8 = vec_splat_s32(8);
706 const vector unsigned int vuint32_4 = vec_splat_u32(4); 706 const vector unsigned int vuint32_4 = vec_splat_u32(4);