Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 133:51571e34b760 libpostproc
Move array specifiers outside DECLARE_ALIGNED() invocations
author | mru |
---|---|
date | Fri, 22 Jan 2010 03:25:11 +0000 |
parents | 92db3581c916 |
children | 218f5063fa65 |
comparison
equal
deleted
inserted
replaced
132:a65cfe0fe4b2 | 133:51571e34b760 |
---|---|
60 this code makes no assumption on src or stride. | 60 this code makes no assumption on src or stride. |
61 One could remove the recomputation of the perm | 61 One could remove the recomputation of the perm |
62 vector by assuming (stride % 16) == 0, unfortunately | 62 vector by assuming (stride % 16) == 0, unfortunately |
63 this is not always true. | 63 this is not always true. |
64 */ | 64 */ |
65 DECLARE_ALIGNED(16, short, data[8]) = | 65 DECLARE_ALIGNED(16, short, data)[8] = |
66 { | 66 { |
67 ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1, | 67 ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1, |
68 data[0] * 2 + 1, | 68 data[0] * 2 + 1, |
69 c->QP * 2, | 69 c->QP * 2, |
70 c->QP * 4 | 70 c->QP * 4 |
220 */ | 220 */ |
221 uint8_t *src2 = src; | 221 uint8_t *src2 = src; |
222 const vector signed int zero = vec_splat_s32(0); | 222 const vector signed int zero = vec_splat_s32(0); |
223 const int properStride = (stride % 16); | 223 const int properStride = (stride % 16); |
224 const int srcAlign = ((unsigned long)src2 % 16); | 224 const int srcAlign = ((unsigned long)src2 % 16); |
225 DECLARE_ALIGNED(16, short, qp[8]) = {c->QP}; | 225 DECLARE_ALIGNED(16, short, qp)[8] = {c->QP}; |
226 vector signed short vqp = vec_ld(0, qp); | 226 vector signed short vqp = vec_ld(0, qp); |
227 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; | 227 vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; |
228 vector unsigned char vbA0, av_uninit(vbA1), av_uninit(vbA2), av_uninit(vbA3), av_uninit(vbA4), av_uninit(vbA5), av_uninit(vbA6), av_uninit(vbA7), av_uninit(vbA8), vbA9; | 228 vector unsigned char vbA0, av_uninit(vbA1), av_uninit(vbA2), av_uninit(vbA3), av_uninit(vbA4), av_uninit(vbA5), av_uninit(vbA6), av_uninit(vbA7), av_uninit(vbA8), vbA9; |
229 vector unsigned char vbB0, av_uninit(vbB1), av_uninit(vbB2), av_uninit(vbB3), av_uninit(vbB4), av_uninit(vbB5), av_uninit(vbB6), av_uninit(vbB7), av_uninit(vbB8), vbB9; | 229 vector unsigned char vbB0, av_uninit(vbB1), av_uninit(vbB2), av_uninit(vbB3), av_uninit(vbB4), av_uninit(vbB5), av_uninit(vbB6), av_uninit(vbB7), av_uninit(vbB8), vbB9; |
230 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; | 230 vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; |
416 can be removed by assuming proper alignment of | 416 can be removed by assuming proper alignment of |
417 src & stride :-( | 417 src & stride :-( |
418 */ | 418 */ |
419 uint8_t *src2 = src + stride*3; | 419 uint8_t *src2 = src + stride*3; |
420 const vector signed int zero = vec_splat_s32(0); | 420 const vector signed int zero = vec_splat_s32(0); |
421 DECLARE_ALIGNED(16, short, qp[8]) = {8*c->QP}; | 421 DECLARE_ALIGNED(16, short, qp)[8] = {8*c->QP}; |
422 vector signed short vqp = vec_splat( | 422 vector signed short vqp = vec_splat( |
423 (vector signed short)vec_ld(0, qp), 0); | 423 (vector signed short)vec_ld(0, qp), 0); |
424 | 424 |
425 #define LOAD_LINE(i) \ | 425 #define LOAD_LINE(i) \ |
426 const vector unsigned char perm##i = \ | 426 const vector unsigned char perm##i = \ |
536 this is not always true. Quite a lot of load/stores | 536 this is not always true. Quite a lot of load/stores |
537 can be removed by assuming proper alignment of | 537 can be removed by assuming proper alignment of |
538 src & stride :-( | 538 src & stride :-( |
539 */ | 539 */ |
540 uint8_t *srcCopy = src; | 540 uint8_t *srcCopy = src; |
541 DECLARE_ALIGNED(16, uint8_t, dt[16]); | 541 DECLARE_ALIGNED(16, uint8_t, dt)[16]; |
542 const vector signed int zero = vec_splat_s32(0); | 542 const vector signed int zero = vec_splat_s32(0); |
543 vector unsigned char v_dt; | 543 vector unsigned char v_dt; |
544 dt[0] = deringThreshold; | 544 dt[0] = deringThreshold; |
545 v_dt = vec_splat(vec_ld(0, dt), 0); | 545 v_dt = vec_splat(vec_ld(0, dt), 0); |
546 | 546 |
600 return; | 600 return; |
601 | 601 |
602 v_avg = vec_avg(v_min, v_max); | 602 v_avg = vec_avg(v_min, v_max); |
603 } | 603 } |
604 | 604 |
605 DECLARE_ALIGNED(16, signed int, S[8]); | 605 DECLARE_ALIGNED(16, signed int, S)[8]; |
606 { | 606 { |
607 const vector unsigned short mask1 = (vector unsigned short) | 607 const vector unsigned short mask1 = (vector unsigned short) |
608 {0x0001, 0x0002, 0x0004, 0x0008, | 608 {0x0001, 0x0002, 0x0004, 0x0008, |
609 0x0010, 0x0020, 0x0040, 0x0080}; | 609 0x0010, 0x0020, 0x0040, 0x0080}; |
610 const vector unsigned short mask2 = (vector unsigned short) | 610 const vector unsigned short mask2 = (vector unsigned short) |
696 } | 696 } |
697 | 697 |
698 /* I'm not sure the following is actually faster | 698 /* I'm not sure the following is actually faster |
699 than straight, unvectorized C code :-( */ | 699 than straight, unvectorized C code :-( */ |
700 | 700 |
701 DECLARE_ALIGNED(16, int, tQP2[4]); | 701 DECLARE_ALIGNED(16, int, tQP2)[4]; |
702 tQP2[0]= c->QP/2 + 1; | 702 tQP2[0]= c->QP/2 + 1; |
703 vector signed int vQP2 = vec_ld(0, tQP2); | 703 vector signed int vQP2 = vec_ld(0, tQP2); |
704 vQP2 = vec_splat(vQP2, 0); | 704 vQP2 = vec_splat(vQP2, 0); |
705 const vector signed int vsint32_8 = vec_splat_s32(8); | 705 const vector signed int vsint32_8 = vec_splat_s32(8); |
706 const vector unsigned int vuint32_4 = vec_splat_u32(4); | 706 const vector unsigned int vuint32_4 = vec_splat_u32(4); |