comparison ppc/int_altivec.c @ 7204:83ffe1bed06e libavcodec

Altivec implementation of APE vector functions
author kostya
date Sun, 06 Jul 2008 07:33:09 +0000
parents f7cbb7733146
children a8a79f5385f6
comparison
equal deleted inserted replaced
7203:87b1dfb5a98d 7204:83ffe1bed06e
26 #include "libavcodec/dsputil.h" 26 #include "libavcodec/dsputil.h"
27 27
28 #include "gcc_fixes.h" 28 #include "gcc_fixes.h"
29 29
30 #include "dsputil_altivec.h" 30 #include "dsputil_altivec.h"
31
32 #include "types_altivec.h"
31 33
32 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, 34 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
33 int size) { 35 int size) {
34 int i, size16; 36 int i, size16;
35 vector signed char vpix1; 37 vector signed char vpix1;
72 u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); 74 u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
73 } 75 }
74 return u.score[3]; 76 return u.score[3];
75 } 77 }
76 78
79 static void add_int16_altivec(int16_t * v1, int16_t * v2, int order)
80 {
81 int i;
82 register vec_s16_t vec, *pv;
83
84 for(i = 0; i < order; i += 8){
85 pv = (vec_s16_t*)v2;
86 vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
87 vec_st(vec_add(vec_ld(0, v1), vec), 0, v1);
88 v1 += 8;
89 v2 += 8;
90 }
91 }
92
93 static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order)
94 {
95 int i;
96 register vec_s16_t vec, *pv;
97
98 for(i = 0; i < order; i += 8){
99 pv = (vec_s16_t*)v2;
100 vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
101 vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1);
102 v1 += 8;
103 v2 += 8;
104 }
105 }
106
107 static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order, const int shift)
108 {
109 int i;
110 LOAD_ZERO;
111 register vec_s16_t vec1, *pv;
112 register vec_s32_t res = vec_splat_s32(0), t;
113 register vec_u32_t shifts;
114 DECLARE_ALIGNED_16(int32_t, ires);
115
116 shifts = zero_u32v;
117 if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));
118 if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));
119 if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));
120 if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));
121 if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));
122
123 for(i = 0; i < order; i += 8){
124 pv = (vec_s16_t*)v1;
125 vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));
126 t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
127 t = vec_sr(t, shifts);
128 res = vec_sums(t, res);
129 v1 += 8;
130 v2 += 8;
131 }
132 res = vec_splat(res, 3);
133 vec_ste(res, 0, &ires);
134 return ires;
135 }
136
77 void int_init_altivec(DSPContext* c, AVCodecContext *avctx) 137 void int_init_altivec(DSPContext* c, AVCodecContext *avctx)
78 { 138 {
79 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; 139 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
140 c->add_int16 = add_int16_altivec;
141 c->sub_int16 = sub_int16_altivec;
142 c->scalarproduct_int16 = scalarproduct_int16_altivec;
80 } 143 }