Mercurial > libavcodec.hg
comparison ppc/h264_template_altivec.c @ 6064:62d040333d51 libavcodec
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
author | lu_zero |
---|---|
date | Sat, 22 Dec 2007 23:10:02 +0000 |
parents | 47ed1b9610b1 |
children | 180976fd652e |
comparison
equal
deleted
inserted
replaced
6063:47ed1b9610b1 | 6064:62d040333d51 |
---|---|
50 \ | 50 \ |
51 dst += stride;\ | 51 dst += stride;\ |
52 src += stride; | 52 src += stride; |
53 | 53 |
54 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ | 54 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ |
55 vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\ | 55 \ |
56 vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);\ | |
57 vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);\ | |
56 \ | 58 \ |
57 psum = vec_mladd(vA, vsrc0ssH, v32ss);\ | 59 psum = vec_mladd(vA, vsrc0ssH, v32ss);\ |
58 psum = vec_mladd(vB, vsrc1ssH, psum);\ | 60 psum = vec_mladd(vE, vsrc1ssH, psum);\ |
59 psum = vec_mladd(vC, vsrc2ssH, psum);\ | |
60 psum = vec_sr(psum, v6us);\ | 61 psum = vec_sr(psum, v6us);\ |
61 \ | 62 \ |
62 vdst = vec_ld(0, dst);\ | 63 vdst = vec_ld(0, dst);\ |
63 ppsum = (vec_u8_t)vec_pack(psum, psum);\ | 64 ppsum = (vec_u8_t)vec_pack(psum, psum);\ |
64 vfdst = vec_perm(vdst, ppsum, fperm);\ | 65 vfdst = vec_perm(vdst, ppsum, fperm);\ |
65 \ | 66 \ |
66 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ | 67 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ |
67 \ | 68 \ |
68 vec_st(fsum, 0, dst);\ | 69 vec_st(fsum, 0, dst);\ |
69 \ | |
70 vsrc0ssH = vsrc1ssH;\ | |
71 vsrc1ssH = vsrc2ssH;\ | |
72 \ | 70 \ |
73 dst += stride;\ | 71 dst += stride;\ |
74 src += stride; | 72 src += stride; |
75 | 73 |
76 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | 74 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, |
153 | 151 |
154 CHROMA_MC8_ALTIVEC_CORE | 152 CHROMA_MC8_ALTIVEC_CORE |
155 } | 153 } |
156 } | 154 } |
157 } else { | 155 } else { |
156 const vec_s16_t vE = vec_add(vB, vC); | |
157 if (ABCD[2]) { // y == 0 B == 0 | |
158 if (!loadSecond) {// -> !reallyBadAlign | 158 if (!loadSecond) {// -> !reallyBadAlign |
159 for (i = 0 ; i < h ; i++) { | 159 for (i = 0 ; i < h ; i++) { |
160 vsrcCuc = vec_ld(stride + 0, src); | 160 vsrcCuc = vec_ld(stride + 0, src); |
161 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | 161 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
162 | |
163 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | 162 CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
163 | |
164 vsrc0uc = vsrc1uc; | |
164 } | 165 } |
165 } else { | 166 } else { |
166 vec_u8_t vsrcDuc; | 167 vec_u8_t vsrcDuc; |
167 for (i = 0 ; i < h ; i++) { | 168 for (i = 0 ; i < h ; i++) { |
168 vsrcCuc = vec_ld(stride + 0, src); | 169 vsrcCuc = vec_ld(0, src); |
169 vsrcDuc = vec_ld(stride + 16, src); | 170 vsrcDuc = vec_ld(15, src); |
170 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | 171 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
171 | |
172 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | 172 CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
173 | |
174 vsrc0uc = vsrc1uc; | |
173 } | 175 } |
176 } | |
177 } else { // x == 0 C == 0 | |
178 if (!loadSecond) {// -> !reallyBadAlign | |
179 for (i = 0 ; i < h ; i++) { | |
180 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
181 | |
182 vsrcCuc = vec_ld(0, src); | |
183 vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
184 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
185 } | |
186 } else { | |
187 vec_u8_t vsrcDuc; | |
188 for (i = 0 ; i < h ; i++) { | |
189 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
190 | |
191 vsrcCuc = vec_ld(0, src); | |
192 vsrcDuc = vec_ld(15, src); | |
193 vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
194 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
195 } | |
196 } | |
174 } | 197 } |
175 } | 198 } |
176 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); | 199 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); |
177 } | 200 } |
178 | 201 |