Mercurial > mplayer.hg
annotate libmpeg2/motion_comp_altivec.c @ 34410:bb440bfcade9
configure: Simplify Theora check.
The Theora check does some very elaborate tests to account for issues in
old libtheora versions. Nowadays libtheora is stable and easy to test
for, so there is no longer a need for the extra complexity.
Also drop the Theora check that linked against tremor/bitwise.c.
Its purpose is now lost in the mists of time.
author | diego |
---|---|
date | Wed, 04 Jan 2012 10:39:38 +0000 |
parents | 25337a2147e7 |
children |
rev | line source |
---|---|
9857 | 1 /* |
2 * motion_comp_altivec.c | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
9857 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
7 * See http://libmpeg2.sourceforge.net/ for updates. | |
8 * | |
9 * mpeg2dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * mpeg2dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 */ | |
23 | |
24 #include "config.h" | |
25 | |
28290 | 26 #if ARCH_PPC |
9857 | 27 |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
28 #ifdef HAVE_ALTIVEC_H |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
29 #include <altivec.h> |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
30 #endif |
9857 | 31 #include <inttypes.h> |
32 | |
33 #include "mpeg2.h" | |
12932 | 34 #include "attributes.h" |
9857 | 35 #include "mpeg2_internal.h" |
36 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
37 typedef vector signed char vector_s8_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
38 typedef vector unsigned char vector_u8_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
39 typedef vector signed short vector_s16_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
40 typedef vector unsigned short vector_u16_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
41 typedef vector signed int vector_s32_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
42 typedef vector unsigned int vector_u32_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
43 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
44 #ifndef COFFEE_BREAK /* Workarounds for gcc suckage */ |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
45 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
46 static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
47 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
48 return vec_ld (A, (uint8_t *)B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
49 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
50 #undef vec_ld |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
51 #define vec_ld my_vec_ld |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
52 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
53 static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
54 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
55 return vec_and (A, B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
56 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
57 #undef vec_and |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
58 #define vec_and my_vec_and |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
59 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
60 static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
61 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
62 return vec_avg (A, B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
63 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
64 #undef vec_avg |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
65 #define vec_avg my_vec_avg |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
66 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
67 #endif |
9857 | 68 |
69 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
70 const int stride, int height) |
9857 | 71 { |
72 vector_u8_t perm, ref0, ref1, tmp; | |
73 | |
74 perm = vec_lvsl (0, ref); | |
75 | |
76 height = (height >> 1) - 1; | |
77 | |
78 ref0 = vec_ld (0, ref); | |
79 ref1 = vec_ld (15, ref); | |
80 ref += stride; | |
81 tmp = vec_perm (ref0, ref1, perm); | |
82 | |
83 do { | |
84 ref0 = vec_ld (0, ref); | |
85 ref1 = vec_ld (15, ref); | |
86 ref += stride; | |
87 vec_st (tmp, 0, dest); | |
88 tmp = vec_perm (ref0, ref1, perm); | |
89 | |
90 ref0 = vec_ld (0, ref); | |
91 ref1 = vec_ld (15, ref); | |
92 ref += stride; | |
93 vec_st (tmp, stride, dest); | |
94 dest += 2*stride; | |
95 tmp = vec_perm (ref0, ref1, perm); | |
96 } while (--height); | |
97 | |
98 ref0 = vec_ld (0, ref); | |
99 ref1 = vec_ld (15, ref); | |
100 vec_st (tmp, 0, dest); | |
101 tmp = vec_perm (ref0, ref1, perm); | |
102 vec_st (tmp, stride, dest); | |
103 } | |
104 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
105 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
106 const int stride, int height) |
9857 | 107 { |
108 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; | |
109 | |
110 tmp0 = vec_lvsl (0, ref); | |
111 tmp0 = vec_mergeh (tmp0, tmp0); | |
112 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
113 tmp1 = vec_lvsl (stride, ref); | |
114 tmp1 = vec_mergeh (tmp1, tmp1); | |
115 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
116 | |
117 height = (height >> 1) - 1; | |
118 | |
119 ref0 = vec_ld (0, ref); | |
120 ref1 = vec_ld (7, ref); | |
121 ref += stride; | |
122 tmp0 = vec_perm (ref0, ref1, perm0); | |
123 | |
124 do { | |
125 ref0 = vec_ld (0, ref); | |
126 ref1 = vec_ld (7, ref); | |
127 ref += stride; | |
128 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
129 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
130 dest += stride; | |
131 tmp1 = vec_perm (ref0, ref1, perm1); | |
132 | |
133 ref0 = vec_ld (0, ref); | |
134 ref1 = vec_ld (7, ref); | |
135 ref += stride; | |
136 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
137 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
138 dest += stride; | |
139 tmp0 = vec_perm (ref0, ref1, perm0); | |
140 } while (--height); | |
141 | |
142 ref0 = vec_ld (0, ref); | |
143 ref1 = vec_ld (7, ref); | |
144 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
145 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
146 dest += stride; | |
147 tmp1 = vec_perm (ref0, ref1, perm1); | |
148 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
149 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
150 } | |
151 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
152 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
153 const int stride, int height) |
9857 | 154 { |
155 vector_u8_t permA, permB, ref0, ref1, tmp; | |
156 | |
157 permA = vec_lvsl (0, ref); | |
158 permB = vec_add (permA, vec_splat_u8 (1)); | |
159 | |
160 height = (height >> 1) - 1; | |
161 | |
162 ref0 = vec_ld (0, ref); | |
163 ref1 = vec_ld (16, ref); | |
164 ref += stride; | |
165 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
166 vec_perm (ref0, ref1, permB)); | |
167 | |
168 do { | |
169 ref0 = vec_ld (0, ref); | |
170 ref1 = vec_ld (16, ref); | |
171 ref += stride; | |
172 vec_st (tmp, 0, dest); | |
173 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
174 vec_perm (ref0, ref1, permB)); | |
175 | |
176 ref0 = vec_ld (0, ref); | |
177 ref1 = vec_ld (16, ref); | |
178 ref += stride; | |
179 vec_st (tmp, stride, dest); | |
180 dest += 2*stride; | |
181 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
182 vec_perm (ref0, ref1, permB)); | |
183 } while (--height); | |
184 | |
185 ref0 = vec_ld (0, ref); | |
186 ref1 = vec_ld (16, ref); | |
187 vec_st (tmp, 0, dest); | |
188 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
189 vec_perm (ref0, ref1, permB)); | |
190 vec_st (tmp, stride, dest); | |
191 } | |
192 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
193 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
194 const int stride, int height) |
9857 | 195 { |
196 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
197 | |
198 ones = vec_splat_u8 (1); | |
199 tmp0 = vec_lvsl (0, ref); | |
200 tmp0 = vec_mergeh (tmp0, tmp0); | |
201 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
202 perm0B = vec_add (perm0A, ones); | |
203 tmp1 = vec_lvsl (stride, ref); | |
204 tmp1 = vec_mergeh (tmp1, tmp1); | |
205 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
206 perm1B = vec_add (perm1A, ones); | |
207 | |
208 height = (height >> 1) - 1; | |
209 | |
210 ref0 = vec_ld (0, ref); | |
211 ref1 = vec_ld (8, ref); | |
212 ref += stride; | |
213 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
214 vec_perm (ref0, ref1, perm0B)); | |
215 | |
216 do { | |
217 ref0 = vec_ld (0, ref); | |
218 ref1 = vec_ld (8, ref); | |
219 ref += stride; | |
220 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
221 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
222 dest += stride; | |
223 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
224 vec_perm (ref0, ref1, perm1B)); | |
225 | |
226 ref0 = vec_ld (0, ref); | |
227 ref1 = vec_ld (8, ref); | |
228 ref += stride; | |
229 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
230 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
231 dest += stride; | |
232 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
233 vec_perm (ref0, ref1, perm0B)); | |
234 } while (--height); | |
235 | |
236 ref0 = vec_ld (0, ref); | |
237 ref1 = vec_ld (8, ref); | |
238 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
239 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
240 dest += stride; | |
241 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
242 vec_perm (ref0, ref1, perm1B)); | |
243 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
244 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
245 } | |
246 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
247 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
248 const int stride, int height) |
9857 | 249 { |
250 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; | |
251 | |
252 perm = vec_lvsl (0, ref); | |
253 | |
254 height = (height >> 1) - 1; | |
255 | |
256 ref0 = vec_ld (0, ref); | |
257 ref1 = vec_ld (15, ref); | |
258 ref += stride; | |
259 tmp0 = vec_perm (ref0, ref1, perm); | |
260 ref0 = vec_ld (0, ref); | |
261 ref1 = vec_ld (15, ref); | |
262 ref += stride; | |
263 tmp1 = vec_perm (ref0, ref1, perm); | |
264 tmp = vec_avg (tmp0, tmp1); | |
265 | |
266 do { | |
267 ref0 = vec_ld (0, ref); | |
268 ref1 = vec_ld (15, ref); | |
269 ref += stride; | |
270 vec_st (tmp, 0, dest); | |
271 tmp0 = vec_perm (ref0, ref1, perm); | |
272 tmp = vec_avg (tmp0, tmp1); | |
273 | |
274 ref0 = vec_ld (0, ref); | |
275 ref1 = vec_ld (15, ref); | |
276 ref += stride; | |
277 vec_st (tmp, stride, dest); | |
278 dest += 2*stride; | |
279 tmp1 = vec_perm (ref0, ref1, perm); | |
280 tmp = vec_avg (tmp0, tmp1); | |
281 } while (--height); | |
282 | |
283 ref0 = vec_ld (0, ref); | |
284 ref1 = vec_ld (15, ref); | |
285 vec_st (tmp, 0, dest); | |
286 tmp0 = vec_perm (ref0, ref1, perm); | |
287 tmp = vec_avg (tmp0, tmp1); | |
288 vec_st (tmp, stride, dest); | |
289 } | |
290 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
291 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
292 const int stride, int height) |
9857 | 293 { |
294 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; | |
295 | |
296 tmp0 = vec_lvsl (0, ref); | |
297 tmp0 = vec_mergeh (tmp0, tmp0); | |
298 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
299 tmp1 = vec_lvsl (stride, ref); | |
300 tmp1 = vec_mergeh (tmp1, tmp1); | |
301 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
302 | |
303 height = (height >> 1) - 1; | |
304 | |
305 ref0 = vec_ld (0, ref); | |
306 ref1 = vec_ld (7, ref); | |
307 ref += stride; | |
308 tmp0 = vec_perm (ref0, ref1, perm0); | |
309 ref0 = vec_ld (0, ref); | |
310 ref1 = vec_ld (7, ref); | |
311 ref += stride; | |
312 tmp1 = vec_perm (ref0, ref1, perm1); | |
313 tmp = vec_avg (tmp0, tmp1); | |
314 | |
315 do { | |
316 ref0 = vec_ld (0, ref); | |
317 ref1 = vec_ld (7, ref); | |
318 ref += stride; | |
319 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
320 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
321 dest += stride; | |
322 tmp0 = vec_perm (ref0, ref1, perm0); | |
323 tmp = vec_avg (tmp0, tmp1); | |
324 | |
325 ref0 = vec_ld (0, ref); | |
326 ref1 = vec_ld (7, ref); | |
327 ref += stride; | |
328 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
329 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
330 dest += stride; | |
331 tmp1 = vec_perm (ref0, ref1, perm1); | |
332 tmp = vec_avg (tmp0, tmp1); | |
333 } while (--height); | |
334 | |
335 ref0 = vec_ld (0, ref); | |
336 ref1 = vec_ld (7, ref); | |
337 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
338 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
339 dest += stride; | |
340 tmp0 = vec_perm (ref0, ref1, perm0); | |
341 tmp = vec_avg (tmp0, tmp1); | |
342 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
343 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
344 } | |
345 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
346 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
347 const int stride, int height) |
9857 | 348 { |
349 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
350 vector_u8_t ones; | |
351 | |
352 ones = vec_splat_u8 (1); | |
353 permA = vec_lvsl (0, ref); | |
354 permB = vec_add (permA, ones); | |
355 | |
356 height = (height >> 1) - 1; | |
357 | |
358 ref0 = vec_ld (0, ref); | |
359 ref1 = vec_ld (16, ref); | |
360 ref += stride; | |
361 A = vec_perm (ref0, ref1, permA); | |
362 B = vec_perm (ref0, ref1, permB); | |
363 avg0 = vec_avg (A, B); | |
364 xor0 = vec_xor (A, B); | |
365 | |
366 ref0 = vec_ld (0, ref); | |
367 ref1 = vec_ld (16, ref); | |
368 ref += stride; | |
369 A = vec_perm (ref0, ref1, permA); | |
370 B = vec_perm (ref0, ref1, permB); | |
371 avg1 = vec_avg (A, B); | |
372 xor1 = vec_xor (A, B); | |
373 tmp = vec_sub (vec_avg (avg0, avg1), | |
374 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
375 vec_xor (avg0, avg1))); | |
376 | |
377 do { | |
378 ref0 = vec_ld (0, ref); | |
379 ref1 = vec_ld (16, ref); | |
380 ref += stride; | |
381 vec_st (tmp, 0, dest); | |
382 A = vec_perm (ref0, ref1, permA); | |
383 B = vec_perm (ref0, ref1, permB); | |
384 avg0 = vec_avg (A, B); | |
385 xor0 = vec_xor (A, B); | |
386 tmp = vec_sub (vec_avg (avg0, avg1), | |
387 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
388 vec_xor (avg0, avg1))); | |
389 | |
390 ref0 = vec_ld (0, ref); | |
391 ref1 = vec_ld (16, ref); | |
392 ref += stride; | |
393 vec_st (tmp, stride, dest); | |
394 dest += 2*stride; | |
395 A = vec_perm (ref0, ref1, permA); | |
396 B = vec_perm (ref0, ref1, permB); | |
397 avg1 = vec_avg (A, B); | |
398 xor1 = vec_xor (A, B); | |
399 tmp = vec_sub (vec_avg (avg0, avg1), | |
400 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
401 vec_xor (avg0, avg1))); | |
402 } while (--height); | |
403 | |
404 ref0 = vec_ld (0, ref); | |
405 ref1 = vec_ld (16, ref); | |
406 vec_st (tmp, 0, dest); | |
407 A = vec_perm (ref0, ref1, permA); | |
408 B = vec_perm (ref0, ref1, permB); | |
409 avg0 = vec_avg (A, B); | |
410 xor0 = vec_xor (A, B); | |
411 tmp = vec_sub (vec_avg (avg0, avg1), | |
412 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
413 vec_xor (avg0, avg1))); | |
414 vec_st (tmp, stride, dest); | |
415 } | |
416 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
417 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
418 const int stride, int height) |
9857 | 419 { |
420 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
421 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; | |
422 | |
423 ones = vec_splat_u8 (1); | |
424 perm0A = vec_lvsl (0, ref); | |
425 perm0A = vec_mergeh (perm0A, perm0A); | |
426 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
427 perm0B = vec_add (perm0A, ones); | |
428 perm1A = vec_lvsl (stride, ref); | |
429 perm1A = vec_mergeh (perm1A, perm1A); | |
430 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
431 perm1B = vec_add (perm1A, ones); | |
432 | |
433 height = (height >> 1) - 1; | |
434 | |
435 ref0 = vec_ld (0, ref); | |
436 ref1 = vec_ld (8, ref); | |
437 ref += stride; | |
438 A = vec_perm (ref0, ref1, perm0A); | |
439 B = vec_perm (ref0, ref1, perm0B); | |
440 avg0 = vec_avg (A, B); | |
441 xor0 = vec_xor (A, B); | |
442 | |
443 ref0 = vec_ld (0, ref); | |
444 ref1 = vec_ld (8, ref); | |
445 ref += stride; | |
446 A = vec_perm (ref0, ref1, perm1A); | |
447 B = vec_perm (ref0, ref1, perm1B); | |
448 avg1 = vec_avg (A, B); | |
449 xor1 = vec_xor (A, B); | |
450 tmp = vec_sub (vec_avg (avg0, avg1), | |
451 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
452 vec_xor (avg0, avg1))); | |
453 | |
454 do { | |
455 ref0 = vec_ld (0, ref); | |
456 ref1 = vec_ld (8, ref); | |
457 ref += stride; | |
458 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
459 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
460 dest += stride; | |
461 A = vec_perm (ref0, ref1, perm0A); | |
462 B = vec_perm (ref0, ref1, perm0B); | |
463 avg0 = vec_avg (A, B); | |
464 xor0 = vec_xor (A, B); | |
465 tmp = vec_sub (vec_avg (avg0, avg1), | |
466 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
467 vec_xor (avg0, avg1))); | |
468 | |
469 ref0 = vec_ld (0, ref); | |
470 ref1 = vec_ld (8, ref); | |
471 ref += stride; | |
472 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
473 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
474 dest += stride; | |
475 A = vec_perm (ref0, ref1, perm1A); | |
476 B = vec_perm (ref0, ref1, perm1B); | |
477 avg1 = vec_avg (A, B); | |
478 xor1 = vec_xor (A, B); | |
479 tmp = vec_sub (vec_avg (avg0, avg1), | |
480 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
481 vec_xor (avg0, avg1))); | |
482 } while (--height); | |
483 | |
484 ref0 = vec_ld (0, ref); | |
485 ref1 = vec_ld (8, ref); | |
486 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
487 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
488 dest += stride; | |
489 A = vec_perm (ref0, ref1, perm0A); | |
490 B = vec_perm (ref0, ref1, perm0B); | |
491 avg0 = vec_avg (A, B); | |
492 xor0 = vec_xor (A, B); | |
493 tmp = vec_sub (vec_avg (avg0, avg1), | |
494 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
495 vec_xor (avg0, avg1))); | |
496 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
497 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
498 } | |
499 | |
500 #if 0 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
501 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
502 const int stride, int height) |
9857 | 503 { |
504 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; | |
505 vector_u16_t splat2, temp; | |
506 | |
507 ones = vec_splat_u8 (1); | |
508 permA = vec_lvsl (0, ref); | |
509 permB = vec_add (permA, ones); | |
510 | |
511 zero = vec_splat_u8 (0); | |
512 splat2 = vec_splat_u16 (2); | |
513 | |
514 do { | |
515 ref0 = vec_ld (0, ref); | |
516 ref1 = vec_ld (8, ref); | |
517 ref += stride; | |
518 A = vec_perm (ref0, ref1, permA); | |
519 B = vec_perm (ref0, ref1, permB); | |
520 ref0 = vec_ld (0, ref); | |
521 ref1 = vec_ld (8, ref); | |
522 C = vec_perm (ref0, ref1, permA); | |
523 D = vec_perm (ref0, ref1, permB); | |
524 | |
525 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), | |
526 (vector_u16_t)vec_mergeh (zero, B)), | |
527 vec_add ((vector_u16_t)vec_mergeh (zero, C), | |
528 (vector_u16_t)vec_mergeh (zero, D))); | |
529 temp = vec_sr (vec_add (temp, splat2), splat2); | |
530 tmp = vec_pack (temp, temp); | |
531 | |
532 vec_st (tmp, 0, dest); | |
533 dest += stride; | |
534 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
535 vec_perm (ref0, ref1, permB)); | |
536 } while (--height); | |
537 } | |
538 #endif | |
539 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
540 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
541 const int stride, int height) |
9857 | 542 { |
543 vector_u8_t perm, ref0, ref1, tmp, prev; | |
544 | |
545 perm = vec_lvsl (0, ref); | |
546 | |
547 height = (height >> 1) - 1; | |
548 | |
549 ref0 = vec_ld (0, ref); | |
550 ref1 = vec_ld (15, ref); | |
551 ref += stride; | |
552 prev = vec_ld (0, dest); | |
553 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
554 | |
555 do { | |
556 ref0 = vec_ld (0, ref); | |
557 ref1 = vec_ld (15, ref); | |
558 ref += stride; | |
559 prev = vec_ld (stride, dest); | |
560 vec_st (tmp, 0, dest); | |
561 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
562 | |
563 ref0 = vec_ld (0, ref); | |
564 ref1 = vec_ld (15, ref); | |
565 ref += stride; | |
566 prev = vec_ld (2*stride, dest); | |
567 vec_st (tmp, stride, dest); | |
568 dest += 2*stride; | |
569 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
570 } while (--height); | |
571 | |
572 ref0 = vec_ld (0, ref); | |
573 ref1 = vec_ld (15, ref); | |
574 prev = vec_ld (stride, dest); | |
575 vec_st (tmp, 0, dest); | |
576 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
577 vec_st (tmp, stride, dest); | |
578 } | |
579 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
580 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
581 const int stride, int height) |
9857 | 582 { |
583 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; | |
584 | |
585 tmp0 = vec_lvsl (0, ref); | |
586 tmp0 = vec_mergeh (tmp0, tmp0); | |
587 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
588 tmp1 = vec_lvsl (stride, ref); | |
589 tmp1 = vec_mergeh (tmp1, tmp1); | |
590 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
591 | |
592 height = (height >> 1) - 1; | |
593 | |
594 ref0 = vec_ld (0, ref); | |
595 ref1 = vec_ld (7, ref); | |
596 ref += stride; | |
597 prev = vec_ld (0, dest); | |
598 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
599 | |
600 do { | |
601 ref0 = vec_ld (0, ref); | |
602 ref1 = vec_ld (7, ref); | |
603 ref += stride; | |
604 prev = vec_ld (stride, dest); | |
605 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
606 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
607 dest += stride; | |
608 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
609 | |
610 ref0 = vec_ld (0, ref); | |
611 ref1 = vec_ld (7, ref); | |
612 ref += stride; | |
613 prev = vec_ld (stride, dest); | |
614 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
615 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
616 dest += stride; | |
617 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
618 } while (--height); | |
619 | |
620 ref0 = vec_ld (0, ref); | |
621 ref1 = vec_ld (7, ref); | |
622 prev = vec_ld (stride, dest); | |
623 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
624 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
625 dest += stride; | |
626 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
627 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
628 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
629 } | |
630 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
631 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
632 const int stride, int height) |
9857 | 633 { |
634 vector_u8_t permA, permB, ref0, ref1, tmp, prev; | |
635 | |
636 permA = vec_lvsl (0, ref); | |
637 permB = vec_add (permA, vec_splat_u8 (1)); | |
638 | |
639 height = (height >> 1) - 1; | |
640 | |
641 ref0 = vec_ld (0, ref); | |
642 ref1 = vec_ld (16, ref); | |
643 prev = vec_ld (0, dest); | |
644 ref += stride; | |
645 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
646 vec_perm (ref0, ref1, permB))); | |
647 | |
648 do { | |
649 ref0 = vec_ld (0, ref); | |
650 ref1 = vec_ld (16, ref); | |
651 ref += stride; | |
652 prev = vec_ld (stride, dest); | |
653 vec_st (tmp, 0, dest); | |
654 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
655 vec_perm (ref0, ref1, permB))); | |
656 | |
657 ref0 = vec_ld (0, ref); | |
658 ref1 = vec_ld (16, ref); | |
659 ref += stride; | |
660 prev = vec_ld (2*stride, dest); | |
661 vec_st (tmp, stride, dest); | |
662 dest += 2*stride; | |
663 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
664 vec_perm (ref0, ref1, permB))); | |
665 } while (--height); | |
666 | |
667 ref0 = vec_ld (0, ref); | |
668 ref1 = vec_ld (16, ref); | |
669 prev = vec_ld (stride, dest); | |
670 vec_st (tmp, 0, dest); | |
671 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
672 vec_perm (ref0, ref1, permB))); | |
673 vec_st (tmp, stride, dest); | |
674 } | |
675 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
676 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
677 const int stride, int height) |
9857 | 678 { |
679 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
680 vector_u8_t prev; | |
681 | |
682 ones = vec_splat_u8 (1); | |
683 tmp0 = vec_lvsl (0, ref); | |
684 tmp0 = vec_mergeh (tmp0, tmp0); | |
685 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
686 perm0B = vec_add (perm0A, ones); | |
687 tmp1 = vec_lvsl (stride, ref); | |
688 tmp1 = vec_mergeh (tmp1, tmp1); | |
689 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
690 perm1B = vec_add (perm1A, ones); | |
691 | |
692 height = (height >> 1) - 1; | |
693 | |
694 ref0 = vec_ld (0, ref); | |
695 ref1 = vec_ld (8, ref); | |
696 prev = vec_ld (0, dest); | |
697 ref += stride; | |
698 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
699 vec_perm (ref0, ref1, perm0B))); | |
700 | |
701 do { | |
702 ref0 = vec_ld (0, ref); | |
703 ref1 = vec_ld (8, ref); | |
704 ref += stride; | |
705 prev = vec_ld (stride, dest); | |
706 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
707 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
708 dest += stride; | |
709 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
710 vec_perm (ref0, ref1, perm1B))); | |
711 | |
712 ref0 = vec_ld (0, ref); | |
713 ref1 = vec_ld (8, ref); | |
714 ref += stride; | |
715 prev = vec_ld (stride, dest); | |
716 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
717 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
718 dest += stride; | |
719 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
720 vec_perm (ref0, ref1, perm0B))); | |
721 } while (--height); | |
722 | |
723 ref0 = vec_ld (0, ref); | |
724 ref1 = vec_ld (8, ref); | |
725 prev = vec_ld (stride, dest); | |
726 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
727 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
728 dest += stride; | |
729 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
730 vec_perm (ref0, ref1, perm1B))); | |
731 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
732 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
733 } | |
734 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
735 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
736 const int stride, int height) |
9857 | 737 { |
738 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; | |
739 | |
740 perm = vec_lvsl (0, ref); | |
741 | |
742 height = (height >> 1) - 1; | |
743 | |
744 ref0 = vec_ld (0, ref); | |
745 ref1 = vec_ld (15, ref); | |
746 ref += stride; | |
747 tmp0 = vec_perm (ref0, ref1, perm); | |
748 ref0 = vec_ld (0, ref); | |
749 ref1 = vec_ld (15, ref); | |
750 ref += stride; | |
751 prev = vec_ld (0, dest); | |
752 tmp1 = vec_perm (ref0, ref1, perm); | |
753 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
754 | |
755 do { | |
756 ref0 = vec_ld (0, ref); | |
757 ref1 = vec_ld (15, ref); | |
758 ref += stride; | |
759 prev = vec_ld (stride, dest); | |
760 vec_st (tmp, 0, dest); | |
761 tmp0 = vec_perm (ref0, ref1, perm); | |
762 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
763 | |
764 ref0 = vec_ld (0, ref); | |
765 ref1 = vec_ld (15, ref); | |
766 ref += stride; | |
767 prev = vec_ld (2*stride, dest); | |
768 vec_st (tmp, stride, dest); | |
769 dest += 2*stride; | |
770 tmp1 = vec_perm (ref0, ref1, perm); | |
771 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
772 } while (--height); | |
773 | |
774 ref0 = vec_ld (0, ref); | |
775 ref1 = vec_ld (15, ref); | |
776 prev = vec_ld (stride, dest); | |
777 vec_st (tmp, 0, dest); | |
778 tmp0 = vec_perm (ref0, ref1, perm); | |
779 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
780 vec_st (tmp, stride, dest); | |
781 } | |
782 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
783 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
784 const int stride, int height) |
9857 | 785 { |
786 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; | |
787 | |
788 tmp0 = vec_lvsl (0, ref); | |
789 tmp0 = vec_mergeh (tmp0, tmp0); | |
790 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
791 tmp1 = vec_lvsl (stride, ref); | |
792 tmp1 = vec_mergeh (tmp1, tmp1); | |
793 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
794 | |
795 height = (height >> 1) - 1; | |
796 | |
797 ref0 = vec_ld (0, ref); | |
798 ref1 = vec_ld (7, ref); | |
799 ref += stride; | |
800 tmp0 = vec_perm (ref0, ref1, perm0); | |
801 ref0 = vec_ld (0, ref); | |
802 ref1 = vec_ld (7, ref); | |
803 ref += stride; | |
804 prev = vec_ld (0, dest); | |
805 tmp1 = vec_perm (ref0, ref1, perm1); | |
806 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
807 | |
808 do { | |
809 ref0 = vec_ld (0, ref); | |
810 ref1 = vec_ld (7, ref); | |
811 ref += stride; | |
812 prev = vec_ld (stride, dest); | |
813 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
814 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
815 dest += stride; | |
816 tmp0 = vec_perm (ref0, ref1, perm0); | |
817 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
818 | |
819 ref0 = vec_ld (0, ref); | |
820 ref1 = vec_ld (7, ref); | |
821 ref += stride; | |
822 prev = vec_ld (stride, dest); | |
823 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
824 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
825 dest += stride; | |
826 tmp1 = vec_perm (ref0, ref1, perm1); | |
827 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
828 } while (--height); | |
829 | |
830 ref0 = vec_ld (0, ref); | |
831 ref1 = vec_ld (7, ref); | |
832 prev = vec_ld (stride, dest); | |
833 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
834 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
835 dest += stride; | |
836 tmp0 = vec_perm (ref0, ref1, perm0); | |
837 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
838 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
839 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
840 } | |
841 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
842 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
843 const int stride, int height) |
9857 | 844 { |
845 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
846 vector_u8_t ones, prev; | |
847 | |
848 ones = vec_splat_u8 (1); | |
849 permA = vec_lvsl (0, ref); | |
850 permB = vec_add (permA, ones); | |
851 | |
852 height = (height >> 1) - 1; | |
853 | |
854 ref0 = vec_ld (0, ref); | |
855 ref1 = vec_ld (16, ref); | |
856 ref += stride; | |
857 A = vec_perm (ref0, ref1, permA); | |
858 B = vec_perm (ref0, ref1, permB); | |
859 avg0 = vec_avg (A, B); | |
860 xor0 = vec_xor (A, B); | |
861 | |
862 ref0 = vec_ld (0, ref); | |
863 ref1 = vec_ld (16, ref); | |
864 ref += stride; | |
865 prev = vec_ld (0, dest); | |
866 A = vec_perm (ref0, ref1, permA); | |
867 B = vec_perm (ref0, ref1, permB); | |
868 avg1 = vec_avg (A, B); | |
869 xor1 = vec_xor (A, B); | |
870 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
871 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
872 vec_xor (avg0, avg1)))); | |
873 | |
874 do { | |
875 ref0 = vec_ld (0, ref); | |
876 ref1 = vec_ld (16, ref); | |
877 ref += stride; | |
878 prev = vec_ld (stride, dest); | |
879 vec_st (tmp, 0, dest); | |
880 A = vec_perm (ref0, ref1, permA); | |
881 B = vec_perm (ref0, ref1, permB); | |
882 avg0 = vec_avg (A, B); | |
883 xor0 = vec_xor (A, B); | |
884 tmp = vec_avg (prev, | |
885 vec_sub (vec_avg (avg0, avg1), | |
886 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
887 vec_xor (avg0, avg1)))); | |
888 | |
889 ref0 = vec_ld (0, ref); | |
890 ref1 = vec_ld (16, ref); | |
891 ref += stride; | |
892 prev = vec_ld (2*stride, dest); | |
893 vec_st (tmp, stride, dest); | |
894 dest += 2*stride; | |
895 A = vec_perm (ref0, ref1, permA); | |
896 B = vec_perm (ref0, ref1, permB); | |
897 avg1 = vec_avg (A, B); | |
898 xor1 = vec_xor (A, B); | |
899 tmp = vec_avg (prev, | |
900 vec_sub (vec_avg (avg0, avg1), | |
901 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
902 vec_xor (avg0, avg1)))); | |
903 } while (--height); | |
904 | |
905 ref0 = vec_ld (0, ref); | |
906 ref1 = vec_ld (16, ref); | |
907 prev = vec_ld (stride, dest); | |
908 vec_st (tmp, 0, dest); | |
909 A = vec_perm (ref0, ref1, permA); | |
910 B = vec_perm (ref0, ref1, permB); | |
911 avg0 = vec_avg (A, B); | |
912 xor0 = vec_xor (A, B); | |
913 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
914 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
915 vec_xor (avg0, avg1)))); | |
916 vec_st (tmp, stride, dest); | |
917 } | |
918 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
919 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
920 const int stride, int height) |
9857 | 921 { |
922 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
923 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; | |
924 | |
925 ones = vec_splat_u8 (1); | |
926 perm0A = vec_lvsl (0, ref); | |
927 perm0A = vec_mergeh (perm0A, perm0A); | |
928 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
929 perm0B = vec_add (perm0A, ones); | |
930 perm1A = vec_lvsl (stride, ref); | |
931 perm1A = vec_mergeh (perm1A, perm1A); | |
932 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
933 perm1B = vec_add (perm1A, ones); | |
934 | |
935 height = (height >> 1) - 1; | |
936 | |
937 ref0 = vec_ld (0, ref); | |
938 ref1 = vec_ld (8, ref); | |
939 ref += stride; | |
940 A = vec_perm (ref0, ref1, perm0A); | |
941 B = vec_perm (ref0, ref1, perm0B); | |
942 avg0 = vec_avg (A, B); | |
943 xor0 = vec_xor (A, B); | |
944 | |
945 ref0 = vec_ld (0, ref); | |
946 ref1 = vec_ld (8, ref); | |
947 ref += stride; | |
948 prev = vec_ld (0, dest); | |
949 A = vec_perm (ref0, ref1, perm1A); | |
950 B = vec_perm (ref0, ref1, perm1B); | |
951 avg1 = vec_avg (A, B); | |
952 xor1 = vec_xor (A, B); | |
953 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
954 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
955 vec_xor (avg0, avg1)))); | |
956 | |
957 do { | |
958 ref0 = vec_ld (0, ref); | |
959 ref1 = vec_ld (8, ref); | |
960 ref += stride; | |
961 prev = vec_ld (stride, dest); | |
962 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
963 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
964 dest += stride; | |
965 A = vec_perm (ref0, ref1, perm0A); | |
966 B = vec_perm (ref0, ref1, perm0B); | |
967 avg0 = vec_avg (A, B); | |
968 xor0 = vec_xor (A, B); | |
969 tmp = vec_avg (prev, | |
970 vec_sub (vec_avg (avg0, avg1), | |
971 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
972 vec_xor (avg0, avg1)))); | |
973 | |
974 ref0 = vec_ld (0, ref); | |
975 ref1 = vec_ld (8, ref); | |
976 ref += stride; | |
977 prev = vec_ld (stride, dest); | |
978 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
979 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
980 dest += stride; | |
981 A = vec_perm (ref0, ref1, perm1A); | |
982 B = vec_perm (ref0, ref1, perm1B); | |
983 avg1 = vec_avg (A, B); | |
984 xor1 = vec_xor (A, B); | |
985 tmp = vec_avg (prev, | |
986 vec_sub (vec_avg (avg0, avg1), | |
987 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
988 vec_xor (avg0, avg1)))); | |
989 } while (--height); | |
990 | |
991 ref0 = vec_ld (0, ref); | |
992 ref1 = vec_ld (8, ref); | |
993 prev = vec_ld (stride, dest); | |
994 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
995 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
996 dest += stride; | |
997 A = vec_perm (ref0, ref1, perm0A); | |
998 B = vec_perm (ref0, ref1, perm0B); | |
999 avg0 = vec_avg (A, B); | |
1000 xor0 = vec_xor (A, B); | |
1001 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
1002 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1003 vec_xor (avg0, avg1)))); | |
1004 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1005 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1006 } | |
1007 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1008 MPEG2_MC_EXTERN (altivec) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1009 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1010 #endif |