comparison postprocess_altivec_template.c @ 41:8dc2a966afa7 libpostproc

Many steps to avutilized this mess: DECLARE_ALIGNED
author lu_zero
date Fri, 02 Mar 2007 09:07:09 +0000
parents b55400a067f0
children 6b76477fb973
comparison
equal deleted inserted replaced
40:7bd9f7dcef91 41:8dc2a966afa7
18 * You should have received a copy of the GNU General Public License 18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software 19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */ 21 */
22 22
23 #include <avutil.h>
23 24
24 #ifdef CONFIG_DARWIN 25 #ifdef CONFIG_DARWIN
25 #define AVV(x...) (x) 26 #define AVV(x...) (x)
26 #else 27 #else
27 #define AVV(x...) {x} 28 #define AVV(x...) {x}
65 this code makes no assumption on src or stride. 66 this code makes no assumption on src or stride.
66 One could remove the recomputation of the perm 67 One could remove the recomputation of the perm
67 vector by assuming (stride % 16) == 0, unfortunately 68 vector by assuming (stride % 16) == 0, unfortunately
68 this is not always true. 69 this is not always true.
69 */ 70 */
70 short __attribute__ ((aligned(16))) data[8]; 71 DECLARE_ALIGNED(16, short, data[8]);
71 int numEq; 72 int numEq;
72 uint8_t *src2 = src; 73 uint8_t *src2 = src;
73 vector signed short v_dcOffset; 74 vector signed short v_dcOffset;
74 vector signed short v2QP; 75 vector signed short v2QP;
75 vector unsigned short v4QP; 76 vector unsigned short v4QP;
204 */ 205 */
205 uint8_t *src2 = src; 206 uint8_t *src2 = src;
206 const vector signed int zero = vec_splat_s32(0); 207 const vector signed int zero = vec_splat_s32(0);
207 const int properStride = (stride % 16); 208 const int properStride = (stride % 16);
208 const int srcAlign = ((unsigned long)src2 % 16); 209 const int srcAlign = ((unsigned long)src2 % 16);
209 short __attribute__ ((aligned(16))) qp[8]; 210 DECLARE_ALIGNED(16, short, qp[8]);
210 qp[0] = c->QP; 211 qp[0] = c->QP;
211 vector signed short vqp = vec_ld(0, qp); 212 vector signed short vqp = vec_ld(0, qp);
212 vqp = vec_splat(vqp, 0); 213 vqp = vec_splat(vqp, 0);
213 214
214 src2 += stride*3; 215 src2 += stride*3;
390 can be removed by assuming proper alignement of 391 can be removed by assuming proper alignement of
391 src & stride :-( 392 src & stride :-(
392 */ 393 */
393 uint8_t *src2 = src; 394 uint8_t *src2 = src;
394 const vector signed int zero = vec_splat_s32(0); 395 const vector signed int zero = vec_splat_s32(0);
395 short __attribute__ ((aligned(16))) qp[8]; 396 DECLARE_ALIGNED(16, short, qp[8]);
396 qp[0] = 8*c->QP; 397 qp[0] = 8*c->QP;
397 vector signed short vqp = vec_ld(0, qp); 398 vector signed short vqp = vec_ld(0, qp);
398 vqp = vec_splat(vqp, 0); 399 vqp = vec_splat(vqp, 0);
399 400
400 #define LOAD_LINE(i) \ 401 #define LOAD_LINE(i) \
513 this is not always true. Quite a lot of load/stores 514 this is not always true. Quite a lot of load/stores
514 can be removed by assuming proper alignement of 515 can be removed by assuming proper alignement of
515 src & stride :-( 516 src & stride :-(
516 */ 517 */
517 uint8_t *srcCopy = src; 518 uint8_t *srcCopy = src;
518 uint8_t __attribute__((aligned(16))) dt[16]; 519 DECLARE_ALIGNED(16, uint8_t, dt[16]);
519 const vector signed int zero = vec_splat_s32(0); 520 const vector signed int zero = vec_splat_s32(0);
520 vector unsigned char v_dt; 521 vector unsigned char v_dt;
521 dt[0] = deringThreshold; 522 dt[0] = deringThreshold;
522 v_dt = vec_splat(vec_ld(0, dt), 0); 523 v_dt = vec_splat(vec_ld(0, dt), 0);
523 524
577 return; 578 return;
578 579
579 v_avg = vec_avg(v_min, v_max); 580 v_avg = vec_avg(v_min, v_max);
580 } 581 }
581 582
582 signed int __attribute__((aligned(16))) S[8]; 583 DECLARE_ALIGNED(16, signed int, S[8]);
583 { 584 {
584 const vector unsigned short mask1 = (vector unsigned short) 585 const vector unsigned short mask1 = (vector unsigned short)
585 AVV(0x0001, 0x0002, 0x0004, 0x0008, 586 AVV(0x0001, 0x0002, 0x0004, 0x0008,
586 0x0010, 0x0020, 0x0040, 0x0080); 587 0x0010, 0x0020, 0x0040, 0x0080);
587 const vector unsigned short mask2 = (vector unsigned short) 588 const vector unsigned short mask2 = (vector unsigned short)
673 } 674 }
674 675
675 /* I'm not sure the following is actually faster 676 /* I'm not sure the following is actually faster
676 than straight, unvectorized C code :-( */ 677 than straight, unvectorized C code :-( */
677 678
678 int __attribute__((aligned(16))) tQP2[4]; 679 DECLARE_ALIGNED(16, int, tQP2[4]);
679 tQP2[0]= c->QP/2 + 1; 680 tQP2[0]= c->QP/2 + 1;
680 vector signed int vQP2 = vec_ld(0, tQP2); 681 vector signed int vQP2 = vec_ld(0, tQP2);
681 vQP2 = vec_splat(vQP2, 0); 682 vQP2 = vec_splat(vQP2, 0);
682 const vector signed int vsint32_8 = vec_splat_s32(8); 683 const vector signed int vsint32_8 = vec_splat_s32(8);
683 const vector unsigned int vuint32_4 = vec_splat_u32(4); 684 const vector unsigned int vuint32_4 = vec_splat_u32(4);