Mercurial > libavcodec.hg
comparison ppc/h264_template_altivec.c @ 6059:8c1a381bddb6 libavcodec
Factorize common code (almost cosmetic)
author | lu_zero |
---|---|
date | Sat, 22 Dec 2007 02:46:32 +0000 |
parents | 93089aed00cb |
children | b1e3368c0d5e |
comparison
equal
deleted
inserted
replaced
6058:93089aed00cb | 6059:8c1a381bddb6 |
---|---|
24 #else | 24 #else |
25 #define ASSERT_ALIGNED(ptr) ; | 25 #define ASSERT_ALIGNED(ptr) ; |
26 #endif | 26 #endif |
27 | 27 |
28 /* this code assume that stride % 16 == 0 */ | 28 /* this code assume that stride % 16 == 0 */ |
29 | |
30 #define CHROMA_MC8_ALTIVEC_CORE \ | |
31 vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\ | |
32 vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);\ | |
33 \ | |
34 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));\ | |
35 psum = vec_mladd(vB, vsrc1ssH, psum);\ | |
36 psum = vec_mladd(vC, vsrc2ssH, psum);\ | |
37 psum = vec_mladd(vD, vsrc3ssH, psum);\ | |
38 psum = vec_add(v32ss, psum);\ | |
39 psum = vec_sr(psum, v6us);\ | |
40 \ | |
41 vdst = vec_ld(0, dst);\ | |
42 ppsum = (vec_u8_t)vec_pack(psum, psum);\ | |
43 vfdst = vec_perm(vdst, ppsum, fperm);\ | |
44 \ | |
45 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ | |
46 \ | |
47 vec_st(fsum, 0, dst);\ | |
48 \ | |
49 vsrc0ssH = vsrc2ssH;\ | |
50 vsrc1ssH = vsrc3ssH;\ | |
51 \ | |
52 dst += stride;\ | |
53 src += stride; | |
54 | |
55 | |
29 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | 56 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { |
30 POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); | 57 POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); |
31 DECLARE_ALIGNED_16(signed int, ABCD[4]) = | 58 DECLARE_ALIGNED_16(signed int, ABCD[4]) = |
32 {((8 - x) * (8 - y)), | 59 {((8 - x) * (8 - y)), |
33 (( x) * (8 - y)), | 60 (( x) * (8 - y)), |
90 vsrcCuc = vec_ld(stride + 0, src); | 117 vsrcCuc = vec_ld(stride + 0, src); |
91 | 118 |
92 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | 119 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
93 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | 120 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
94 | 121 |
95 vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc); | 122 CHROMA_MC8_ALTIVEC_CORE |
96 vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc); | |
97 | |
98 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); | |
99 psum = vec_mladd(vB, vsrc1ssH, psum); | |
100 psum = vec_mladd(vC, vsrc2ssH, psum); | |
101 psum = vec_mladd(vD, vsrc3ssH, psum); | |
102 psum = vec_add(v32ss, psum); | |
103 psum = vec_sra(psum, v6us); | |
104 | |
105 vdst = vec_ld(0, dst); | |
106 ppsum = (vec_u8_t)vec_packsu(psum, psum); | |
107 vfdst = vec_perm(vdst, ppsum, fperm); | |
108 | |
109 OP_U8_ALTIVEC(fsum, vfdst, vdst); | |
110 | |
111 vec_st(fsum, 0, dst); | |
112 | |
113 vsrc0ssH = vsrc2ssH; | |
114 vsrc1ssH = vsrc3ssH; | |
115 | |
116 dst += stride; | |
117 src += stride; | |
118 } | 123 } |
119 } else { | 124 } else { |
120 vec_u8_t vsrcDuc; | 125 vec_u8_t vsrcDuc; |
121 for (i = 0 ; i < h ; i++) { | 126 for (i = 0 ; i < h ; i++) { |
122 vsrcCuc = vec_ld(stride + 0, src); | 127 vsrcCuc = vec_ld(stride + 0, src); |
126 if (reallyBadAlign) | 131 if (reallyBadAlign) |
127 vsrc3uc = vsrcDuc; | 132 vsrc3uc = vsrcDuc; |
128 else | 133 else |
129 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | 134 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
130 | 135 |
131 vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc); | 136 CHROMA_MC8_ALTIVEC_CORE |
132 vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc); | |
133 | |
134 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); | |
135 psum = vec_mladd(vB, vsrc1ssH, psum); | |
136 psum = vec_mladd(vC, vsrc2ssH, psum); | |
137 psum = vec_mladd(vD, vsrc3ssH, psum); | |
138 psum = vec_add(v32ss, psum); | |
139 psum = vec_sr(psum, v6us); | |
140 | |
141 vdst = vec_ld(0, dst); | |
142 ppsum = (vec_u8_t)vec_pack(psum, psum); | |
143 vfdst = vec_perm(vdst, ppsum, fperm); | |
144 | |
145 OP_U8_ALTIVEC(fsum, vfdst, vdst); | |
146 | |
147 vec_st(fsum, 0, dst); | |
148 | |
149 vsrc0ssH = vsrc2ssH; | |
150 vsrc1ssH = vsrc3ssH; | |
151 | |
152 dst += stride; | |
153 src += stride; | |
154 } | 137 } |
155 } | 138 } |
156 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); | 139 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); |
157 } | 140 } |
141 | |
142 #undef CHROMA_MC8_ALTIVEC_CORE | |
158 | 143 |
159 /* this code assume stride % 16 == 0 */ | 144 /* this code assume stride % 16 == 0 */ |
160 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { | 145 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { |
161 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); | 146 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); |
162 register int i; | 147 register int i; |