Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 41:8dc2a966afa7 libpostproc
Many steps to avutilized this mess: DECLARE_ALIGNED
author | lu_zero |
---|---|
date | Fri, 02 Mar 2007 09:07:09 +0000 |
parents | b55400a067f0 |
children | 6b76477fb973 |
comparison
equal
deleted
inserted
replaced
40:7bd9f7dcef91 | 41:8dc2a966afa7 |
---|---|
18 * You should have received a copy of the GNU General Public License | 18 * You should have received a copy of the GNU General Public License |
19 * along with FFmpeg; if not, write to the Free Software | 19 * along with FFmpeg; if not, write to the Free Software |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 */ | 21 */ |
22 | 22 |
23 #include <avutil.h> | |
23 | 24 |
24 #ifdef CONFIG_DARWIN | 25 #ifdef CONFIG_DARWIN |
25 #define AVV(x...) (x) | 26 #define AVV(x...) (x) |
26 #else | 27 #else |
27 #define AVV(x...) {x} | 28 #define AVV(x...) {x} |
65 this code makes no assumption on src or stride. | 66 this code makes no assumption on src or stride. |
66 One could remove the recomputation of the perm | 67 One could remove the recomputation of the perm |
67 vector by assuming (stride % 16) == 0, unfortunately | 68 vector by assuming (stride % 16) == 0, unfortunately |
68 this is not always true. | 69 this is not always true. |
69 */ | 70 */ |
70 short __attribute__ ((aligned(16))) data[8]; | 71 DECLARE_ALIGNED(16, short, data[8]); |
71 int numEq; | 72 int numEq; |
72 uint8_t *src2 = src; | 73 uint8_t *src2 = src; |
73 vector signed short v_dcOffset; | 74 vector signed short v_dcOffset; |
74 vector signed short v2QP; | 75 vector signed short v2QP; |
75 vector unsigned short v4QP; | 76 vector unsigned short v4QP; |
204 */ | 205 */ |
205 uint8_t *src2 = src; | 206 uint8_t *src2 = src; |
206 const vector signed int zero = vec_splat_s32(0); | 207 const vector signed int zero = vec_splat_s32(0); |
207 const int properStride = (stride % 16); | 208 const int properStride = (stride % 16); |
208 const int srcAlign = ((unsigned long)src2 % 16); | 209 const int srcAlign = ((unsigned long)src2 % 16); |
209 short __attribute__ ((aligned(16))) qp[8]; | 210 DECLARE_ALIGNED(16, short, qp[8]); |
210 qp[0] = c->QP; | 211 qp[0] = c->QP; |
211 vector signed short vqp = vec_ld(0, qp); | 212 vector signed short vqp = vec_ld(0, qp); |
212 vqp = vec_splat(vqp, 0); | 213 vqp = vec_splat(vqp, 0); |
213 | 214 |
214 src2 += stride*3; | 215 src2 += stride*3; |
390 can be removed by assuming proper alignement of | 391 can be removed by assuming proper alignement of |
391 src & stride :-( | 392 src & stride :-( |
392 */ | 393 */ |
393 uint8_t *src2 = src; | 394 uint8_t *src2 = src; |
394 const vector signed int zero = vec_splat_s32(0); | 395 const vector signed int zero = vec_splat_s32(0); |
395 short __attribute__ ((aligned(16))) qp[8]; | 396 DECLARE_ALIGNED(16, short, qp[8]); |
396 qp[0] = 8*c->QP; | 397 qp[0] = 8*c->QP; |
397 vector signed short vqp = vec_ld(0, qp); | 398 vector signed short vqp = vec_ld(0, qp); |
398 vqp = vec_splat(vqp, 0); | 399 vqp = vec_splat(vqp, 0); |
399 | 400 |
400 #define LOAD_LINE(i) \ | 401 #define LOAD_LINE(i) \ |
513 this is not always true. Quite a lot of load/stores | 514 this is not always true. Quite a lot of load/stores |
514 can be removed by assuming proper alignement of | 515 can be removed by assuming proper alignement of |
515 src & stride :-( | 516 src & stride :-( |
516 */ | 517 */ |
517 uint8_t *srcCopy = src; | 518 uint8_t *srcCopy = src; |
518 uint8_t __attribute__((aligned(16))) dt[16]; | 519 DECLARE_ALIGNED(16, uint8_t, dt[16]); |
519 const vector signed int zero = vec_splat_s32(0); | 520 const vector signed int zero = vec_splat_s32(0); |
520 vector unsigned char v_dt; | 521 vector unsigned char v_dt; |
521 dt[0] = deringThreshold; | 522 dt[0] = deringThreshold; |
522 v_dt = vec_splat(vec_ld(0, dt), 0); | 523 v_dt = vec_splat(vec_ld(0, dt), 0); |
523 | 524 |
577 return; | 578 return; |
578 | 579 |
579 v_avg = vec_avg(v_min, v_max); | 580 v_avg = vec_avg(v_min, v_max); |
580 } | 581 } |
581 | 582 |
582 signed int __attribute__((aligned(16))) S[8]; | 583 DECLARE_ALIGNED(16, signed int, S[8]); |
583 { | 584 { |
584 const vector unsigned short mask1 = (vector unsigned short) | 585 const vector unsigned short mask1 = (vector unsigned short) |
585 AVV(0x0001, 0x0002, 0x0004, 0x0008, | 586 AVV(0x0001, 0x0002, 0x0004, 0x0008, |
586 0x0010, 0x0020, 0x0040, 0x0080); | 587 0x0010, 0x0020, 0x0040, 0x0080); |
587 const vector unsigned short mask2 = (vector unsigned short) | 588 const vector unsigned short mask2 = (vector unsigned short) |
673 } | 674 } |
674 | 675 |
675 /* I'm not sure the following is actually faster | 676 /* I'm not sure the following is actually faster |
676 than straight, unvectorized C code :-( */ | 677 than straight, unvectorized C code :-( */ |
677 | 678 |
678 int __attribute__((aligned(16))) tQP2[4]; | 679 DECLARE_ALIGNED(16, int, tQP2[4]); |
679 tQP2[0]= c->QP/2 + 1; | 680 tQP2[0]= c->QP/2 + 1; |
680 vector signed int vQP2 = vec_ld(0, tQP2); | 681 vector signed int vQP2 = vec_ld(0, tQP2); |
681 vQP2 = vec_splat(vQP2, 0); | 682 vQP2 = vec_splat(vQP2, 0); |
682 const vector signed int vsint32_8 = vec_splat_s32(8); | 683 const vector signed int vsint32_8 = vec_splat_s32(8); |
683 const vector unsigned int vuint32_4 = vec_splat_u32(4); | 684 const vector unsigned int vuint32_4 = vec_splat_u32(4); |