Mercurial > libavcodec.hg
comparison ppc/h264_altivec.c @ 9444:fe17033a79ed libavcodec
Altivec version of avg_no_rnd_vc1_chroma_mc8
author | conrad |
---|---|
date | Wed, 15 Apr 2009 06:23:40 +0000 |
parents | ef3a7b711cc0 |
children | 34a65026fa06 |
comparison
equal
deleted
inserted
replaced
9443:3970fe47fea3 | 9444:fe17033a79ed |
---|---|
29 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s | 29 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s |
30 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) | 30 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) |
31 | 31 |
32 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC | 32 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC |
33 #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec | 33 #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec |
34 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec put_no_rnd_vc1_chroma_mc8_altivec | |
34 #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num | 35 #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num |
35 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec | 36 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec |
36 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num | 37 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num |
37 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec | 38 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec |
38 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num | 39 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num |
39 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec | 40 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec |
40 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num | 41 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num |
41 #include "h264_template_altivec.c" | 42 #include "h264_template_altivec.c" |
42 #undef OP_U8_ALTIVEC | 43 #undef OP_U8_ALTIVEC |
43 #undef PREFIX_h264_chroma_mc8_altivec | 44 #undef PREFIX_h264_chroma_mc8_altivec |
45 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec | |
44 #undef PREFIX_h264_chroma_mc8_num | 46 #undef PREFIX_h264_chroma_mc8_num |
45 #undef PREFIX_h264_qpel16_h_lowpass_altivec | 47 #undef PREFIX_h264_qpel16_h_lowpass_altivec |
46 #undef PREFIX_h264_qpel16_h_lowpass_num | 48 #undef PREFIX_h264_qpel16_h_lowpass_num |
47 #undef PREFIX_h264_qpel16_v_lowpass_altivec | 49 #undef PREFIX_h264_qpel16_v_lowpass_altivec |
48 #undef PREFIX_h264_qpel16_v_lowpass_num | 50 #undef PREFIX_h264_qpel16_v_lowpass_num |
49 #undef PREFIX_h264_qpel16_hv_lowpass_altivec | 51 #undef PREFIX_h264_qpel16_hv_lowpass_altivec |
50 #undef PREFIX_h264_qpel16_hv_lowpass_num | 52 #undef PREFIX_h264_qpel16_hv_lowpass_num |
51 | 53 |
52 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC | 54 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC |
53 #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec | 55 #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec |
56 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec avg_no_rnd_vc1_chroma_mc8_altivec | |
54 #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num | 57 #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num |
55 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec | 58 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec |
56 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num | 59 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num |
57 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec | 60 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec |
58 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num | 61 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num |
59 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec | 62 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec |
60 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num | 63 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num |
61 #include "h264_template_altivec.c" | 64 #include "h264_template_altivec.c" |
62 #undef OP_U8_ALTIVEC | 65 #undef OP_U8_ALTIVEC |
63 #undef PREFIX_h264_chroma_mc8_altivec | 66 #undef PREFIX_h264_chroma_mc8_altivec |
67 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec | |
64 #undef PREFIX_h264_chroma_mc8_num | 68 #undef PREFIX_h264_chroma_mc8_num |
65 #undef PREFIX_h264_qpel16_h_lowpass_altivec | 69 #undef PREFIX_h264_qpel16_h_lowpass_altivec |
66 #undef PREFIX_h264_qpel16_h_lowpass_num | 70 #undef PREFIX_h264_qpel16_h_lowpass_num |
67 #undef PREFIX_h264_qpel16_v_lowpass_altivec | 71 #undef PREFIX_h264_qpel16_v_lowpass_altivec |
68 #undef PREFIX_h264_qpel16_v_lowpass_num | 72 #undef PREFIX_h264_qpel16_v_lowpass_num |
176 DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ | 180 DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ |
177 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | 181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ |
178 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | 182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ |
179 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ | 183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ |
180 }\ | 184 }\ |
181 | |
182 /* this code assume that stride % 16 == 0 */ | |
183 void put_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | |
184 DECLARE_ALIGNED_16(signed int, ABCD[4]) = | |
185 {((8 - x) * (8 - y)), | |
186 (( x) * (8 - y)), | |
187 ((8 - x) * ( y)), | |
188 (( x) * ( y))}; | |
189 register int i; | |
190 vec_u8 fperm; | |
191 const vec_s32 vABCD = vec_ld(0, ABCD); | |
192 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); | |
193 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); | |
194 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); | |
195 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); | |
196 LOAD_ZERO; | |
197 const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); | |
198 const vec_u16 v6us = vec_splat_u16(6); | |
199 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; | |
200 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; | |
201 | |
202 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; | |
203 vec_u8 vsrc0uc, vsrc1uc; | |
204 vec_s16 vsrc0ssH, vsrc1ssH; | |
205 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; | |
206 vec_s16 vsrc2ssH, vsrc3ssH, psum; | |
207 vec_u8 vdst, ppsum, fsum; | |
208 | |
209 if (((unsigned long)dst) % 16 == 0) { | |
210 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, | |
211 0x14, 0x15, 0x16, 0x17, | |
212 0x08, 0x09, 0x0A, 0x0B, | |
213 0x0C, 0x0D, 0x0E, 0x0F}; | |
214 } else { | |
215 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, | |
216 0x04, 0x05, 0x06, 0x07, | |
217 0x18, 0x19, 0x1A, 0x1B, | |
218 0x1C, 0x1D, 0x1E, 0x1F}; | |
219 } | |
220 | |
221 vsrcAuc = vec_ld(0, src); | |
222 | |
223 if (loadSecond) | |
224 vsrcBuc = vec_ld(16, src); | |
225 vsrcperm0 = vec_lvsl(0, src); | |
226 vsrcperm1 = vec_lvsl(1, src); | |
227 | |
228 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); | |
229 if (reallyBadAlign) | |
230 vsrc1uc = vsrcBuc; | |
231 else | |
232 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); | |
233 | |
234 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc); | |
235 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc); | |
236 | |
237 if (!loadSecond) {// -> !reallyBadAlign | |
238 for (i = 0 ; i < h ; i++) { | |
239 | |
240 | |
241 vsrcCuc = vec_ld(stride + 0, src); | |
242 | |
243 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
244 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
245 | |
246 vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc2uc); | |
247 vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc3uc); | |
248 | |
249 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); | |
250 psum = vec_mladd(vB, vsrc1ssH, psum); | |
251 psum = vec_mladd(vC, vsrc2ssH, psum); | |
252 psum = vec_mladd(vD, vsrc3ssH, psum); | |
253 psum = vec_add(v28ss, psum); | |
254 psum = vec_sra(psum, v6us); | |
255 | |
256 vdst = vec_ld(0, dst); | |
257 ppsum = (vec_u8)vec_packsu(psum, psum); | |
258 fsum = vec_perm(vdst, ppsum, fperm); | |
259 | |
260 vec_st(fsum, 0, dst); | |
261 | |
262 vsrc0ssH = vsrc2ssH; | |
263 vsrc1ssH = vsrc3ssH; | |
264 | |
265 dst += stride; | |
266 src += stride; | |
267 } | |
268 } else { | |
269 vec_u8 vsrcDuc; | |
270 for (i = 0 ; i < h ; i++) { | |
271 vsrcCuc = vec_ld(stride + 0, src); | |
272 vsrcDuc = vec_ld(stride + 16, src); | |
273 | |
274 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
275 if (reallyBadAlign) | |
276 vsrc3uc = vsrcDuc; | |
277 else | |
278 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
279 | |
280 vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc2uc); | |
281 vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc3uc); | |
282 | |
283 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); | |
284 psum = vec_mladd(vB, vsrc1ssH, psum); | |
285 psum = vec_mladd(vC, vsrc2ssH, psum); | |
286 psum = vec_mladd(vD, vsrc3ssH, psum); | |
287 psum = vec_add(v28ss, psum); | |
288 psum = vec_sr(psum, v6us); | |
289 | |
290 vdst = vec_ld(0, dst); | |
291 ppsum = (vec_u8)vec_pack(psum, psum); | |
292 fsum = vec_perm(vdst, ppsum, fperm); | |
293 | |
294 vec_st(fsum, 0, dst); | |
295 | |
296 vsrc0ssH = vsrc2ssH; | |
297 vsrc1ssH = vsrc3ssH; | |
298 | |
299 dst += stride; | |
300 src += stride; | |
301 } | |
302 } | |
303 } | |
304 | 185 |
305 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, | 186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
306 const uint8_t * src2, int dst_stride, | 187 const uint8_t * src2, int dst_stride, |
307 int src_stride1, int h) | 188 int src_stride1, int h) |
308 { | 189 { |
1090 | 971 |
1091 if (has_altivec()) { | 972 if (has_altivec()) { |
1092 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; | 973 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; |
1093 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; | 974 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; |
1094 c->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec; | 975 c->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec; |
976 c->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec; | |
1095 c->h264_idct_add = ff_h264_idct_add_altivec; | 977 c->h264_idct_add = ff_h264_idct_add_altivec; |
1096 c->h264_idct_add8 = ff_h264_idct_add8_altivec; | 978 c->h264_idct_add8 = ff_h264_idct_add8_altivec; |
1097 c->h264_idct_add16 = ff_h264_idct_add16_altivec; | 979 c->h264_idct_add16 = ff_h264_idct_add16_altivec; |
1098 c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; | 980 c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; |
1099 c->h264_idct_dc_add= h264_idct_dc_add_altivec; | 981 c->h264_idct_dc_add= h264_idct_dc_add_altivec; |