Mercurial > mplayer.hg
annotate libmpeg2/motion_comp_altivec.c @ 11196:0750ab856742
More precise wording inspired by michaelni.
author | diego |
---|---|
date | Mon, 20 Oct 2003 22:51:20 +0000 |
parents | f0e14d641160 |
children | d0a8810e155c |
rev | line source |
---|---|
9857 | 1 /* |
2 * motion_comp_altivec.c | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
9857 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
7 * See http://libmpeg2.sourceforge.net/ for updates. | |
8 * | |
9 * mpeg2dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * mpeg2dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 */ | |
23 | |
24 #include "config.h" | |
25 | |
26 #ifdef ARCH_PPC | |
27 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
28 #ifdef HAVE_ALTIVEC_H |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
29 #include <altivec.h> |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
30 #endif |
9857 | 31 #include <inttypes.h> |
32 | |
33 #include "mpeg2.h" | |
34 #include "mpeg2_internal.h" | |
35 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
36 typedef vector signed char vector_s8_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
37 typedef vector unsigned char vector_u8_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
38 typedef vector signed short vector_s16_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
39 typedef vector unsigned short vector_u16_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
40 typedef vector signed int vector_s32_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
41 typedef vector unsigned int vector_u32_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
42 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
43 #ifndef COFFEE_BREAK /* Workarounds for gcc suckage */ |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
44 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
45 static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
46 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
47 return vec_ld (A, (uint8_t *)B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
48 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
49 #undef vec_ld |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
50 #define vec_ld my_vec_ld |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
51 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
52 static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
53 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
54 return vec_and (A, B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
55 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
56 #undef vec_and |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
57 #define vec_and my_vec_and |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
58 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
59 static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
60 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
61 return vec_avg (A, B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
62 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
63 #undef vec_avg |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
64 #define vec_avg my_vec_avg |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
65 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
66 #endif |
9857 | 67 |
68 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
69 const int stride, int height) |
9857 | 70 { |
71 vector_u8_t perm, ref0, ref1, tmp; | |
72 | |
73 perm = vec_lvsl (0, ref); | |
74 | |
75 height = (height >> 1) - 1; | |
76 | |
77 ref0 = vec_ld (0, ref); | |
78 ref1 = vec_ld (15, ref); | |
79 ref += stride; | |
80 tmp = vec_perm (ref0, ref1, perm); | |
81 | |
82 do { | |
83 ref0 = vec_ld (0, ref); | |
84 ref1 = vec_ld (15, ref); | |
85 ref += stride; | |
86 vec_st (tmp, 0, dest); | |
87 tmp = vec_perm (ref0, ref1, perm); | |
88 | |
89 ref0 = vec_ld (0, ref); | |
90 ref1 = vec_ld (15, ref); | |
91 ref += stride; | |
92 vec_st (tmp, stride, dest); | |
93 dest += 2*stride; | |
94 tmp = vec_perm (ref0, ref1, perm); | |
95 } while (--height); | |
96 | |
97 ref0 = vec_ld (0, ref); | |
98 ref1 = vec_ld (15, ref); | |
99 vec_st (tmp, 0, dest); | |
100 tmp = vec_perm (ref0, ref1, perm); | |
101 vec_st (tmp, stride, dest); | |
102 } | |
103 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
104 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
105 const int stride, int height) |
9857 | 106 { |
107 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; | |
108 | |
109 tmp0 = vec_lvsl (0, ref); | |
110 tmp0 = vec_mergeh (tmp0, tmp0); | |
111 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
112 tmp1 = vec_lvsl (stride, ref); | |
113 tmp1 = vec_mergeh (tmp1, tmp1); | |
114 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
115 | |
116 height = (height >> 1) - 1; | |
117 | |
118 ref0 = vec_ld (0, ref); | |
119 ref1 = vec_ld (7, ref); | |
120 ref += stride; | |
121 tmp0 = vec_perm (ref0, ref1, perm0); | |
122 | |
123 do { | |
124 ref0 = vec_ld (0, ref); | |
125 ref1 = vec_ld (7, ref); | |
126 ref += stride; | |
127 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
128 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
129 dest += stride; | |
130 tmp1 = vec_perm (ref0, ref1, perm1); | |
131 | |
132 ref0 = vec_ld (0, ref); | |
133 ref1 = vec_ld (7, ref); | |
134 ref += stride; | |
135 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
136 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
137 dest += stride; | |
138 tmp0 = vec_perm (ref0, ref1, perm0); | |
139 } while (--height); | |
140 | |
141 ref0 = vec_ld (0, ref); | |
142 ref1 = vec_ld (7, ref); | |
143 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
144 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
145 dest += stride; | |
146 tmp1 = vec_perm (ref0, ref1, perm1); | |
147 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
148 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
149 } | |
150 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
151 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
152 const int stride, int height) |
9857 | 153 { |
154 vector_u8_t permA, permB, ref0, ref1, tmp; | |
155 | |
156 permA = vec_lvsl (0, ref); | |
157 permB = vec_add (permA, vec_splat_u8 (1)); | |
158 | |
159 height = (height >> 1) - 1; | |
160 | |
161 ref0 = vec_ld (0, ref); | |
162 ref1 = vec_ld (16, ref); | |
163 ref += stride; | |
164 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
165 vec_perm (ref0, ref1, permB)); | |
166 | |
167 do { | |
168 ref0 = vec_ld (0, ref); | |
169 ref1 = vec_ld (16, ref); | |
170 ref += stride; | |
171 vec_st (tmp, 0, dest); | |
172 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
173 vec_perm (ref0, ref1, permB)); | |
174 | |
175 ref0 = vec_ld (0, ref); | |
176 ref1 = vec_ld (16, ref); | |
177 ref += stride; | |
178 vec_st (tmp, stride, dest); | |
179 dest += 2*stride; | |
180 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
181 vec_perm (ref0, ref1, permB)); | |
182 } while (--height); | |
183 | |
184 ref0 = vec_ld (0, ref); | |
185 ref1 = vec_ld (16, ref); | |
186 vec_st (tmp, 0, dest); | |
187 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
188 vec_perm (ref0, ref1, permB)); | |
189 vec_st (tmp, stride, dest); | |
190 } | |
191 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
192 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
193 const int stride, int height) |
9857 | 194 { |
195 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
196 | |
197 ones = vec_splat_u8 (1); | |
198 tmp0 = vec_lvsl (0, ref); | |
199 tmp0 = vec_mergeh (tmp0, tmp0); | |
200 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
201 perm0B = vec_add (perm0A, ones); | |
202 tmp1 = vec_lvsl (stride, ref); | |
203 tmp1 = vec_mergeh (tmp1, tmp1); | |
204 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
205 perm1B = vec_add (perm1A, ones); | |
206 | |
207 height = (height >> 1) - 1; | |
208 | |
209 ref0 = vec_ld (0, ref); | |
210 ref1 = vec_ld (8, ref); | |
211 ref += stride; | |
212 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
213 vec_perm (ref0, ref1, perm0B)); | |
214 | |
215 do { | |
216 ref0 = vec_ld (0, ref); | |
217 ref1 = vec_ld (8, ref); | |
218 ref += stride; | |
219 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
220 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
221 dest += stride; | |
222 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
223 vec_perm (ref0, ref1, perm1B)); | |
224 | |
225 ref0 = vec_ld (0, ref); | |
226 ref1 = vec_ld (8, ref); | |
227 ref += stride; | |
228 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
229 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
230 dest += stride; | |
231 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
232 vec_perm (ref0, ref1, perm0B)); | |
233 } while (--height); | |
234 | |
235 ref0 = vec_ld (0, ref); | |
236 ref1 = vec_ld (8, ref); | |
237 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
238 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
239 dest += stride; | |
240 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
241 vec_perm (ref0, ref1, perm1B)); | |
242 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
243 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
244 } | |
245 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
246 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
247 const int stride, int height) |
9857 | 248 { |
249 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; | |
250 | |
251 perm = vec_lvsl (0, ref); | |
252 | |
253 height = (height >> 1) - 1; | |
254 | |
255 ref0 = vec_ld (0, ref); | |
256 ref1 = vec_ld (15, ref); | |
257 ref += stride; | |
258 tmp0 = vec_perm (ref0, ref1, perm); | |
259 ref0 = vec_ld (0, ref); | |
260 ref1 = vec_ld (15, ref); | |
261 ref += stride; | |
262 tmp1 = vec_perm (ref0, ref1, perm); | |
263 tmp = vec_avg (tmp0, tmp1); | |
264 | |
265 do { | |
266 ref0 = vec_ld (0, ref); | |
267 ref1 = vec_ld (15, ref); | |
268 ref += stride; | |
269 vec_st (tmp, 0, dest); | |
270 tmp0 = vec_perm (ref0, ref1, perm); | |
271 tmp = vec_avg (tmp0, tmp1); | |
272 | |
273 ref0 = vec_ld (0, ref); | |
274 ref1 = vec_ld (15, ref); | |
275 ref += stride; | |
276 vec_st (tmp, stride, dest); | |
277 dest += 2*stride; | |
278 tmp1 = vec_perm (ref0, ref1, perm); | |
279 tmp = vec_avg (tmp0, tmp1); | |
280 } while (--height); | |
281 | |
282 ref0 = vec_ld (0, ref); | |
283 ref1 = vec_ld (15, ref); | |
284 vec_st (tmp, 0, dest); | |
285 tmp0 = vec_perm (ref0, ref1, perm); | |
286 tmp = vec_avg (tmp0, tmp1); | |
287 vec_st (tmp, stride, dest); | |
288 } | |
289 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
290 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
291 const int stride, int height) |
9857 | 292 { |
293 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; | |
294 | |
295 tmp0 = vec_lvsl (0, ref); | |
296 tmp0 = vec_mergeh (tmp0, tmp0); | |
297 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
298 tmp1 = vec_lvsl (stride, ref); | |
299 tmp1 = vec_mergeh (tmp1, tmp1); | |
300 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
301 | |
302 height = (height >> 1) - 1; | |
303 | |
304 ref0 = vec_ld (0, ref); | |
305 ref1 = vec_ld (7, ref); | |
306 ref += stride; | |
307 tmp0 = vec_perm (ref0, ref1, perm0); | |
308 ref0 = vec_ld (0, ref); | |
309 ref1 = vec_ld (7, ref); | |
310 ref += stride; | |
311 tmp1 = vec_perm (ref0, ref1, perm1); | |
312 tmp = vec_avg (tmp0, tmp1); | |
313 | |
314 do { | |
315 ref0 = vec_ld (0, ref); | |
316 ref1 = vec_ld (7, ref); | |
317 ref += stride; | |
318 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
319 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
320 dest += stride; | |
321 tmp0 = vec_perm (ref0, ref1, perm0); | |
322 tmp = vec_avg (tmp0, tmp1); | |
323 | |
324 ref0 = vec_ld (0, ref); | |
325 ref1 = vec_ld (7, ref); | |
326 ref += stride; | |
327 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
328 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
329 dest += stride; | |
330 tmp1 = vec_perm (ref0, ref1, perm1); | |
331 tmp = vec_avg (tmp0, tmp1); | |
332 } while (--height); | |
333 | |
334 ref0 = vec_ld (0, ref); | |
335 ref1 = vec_ld (7, ref); | |
336 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
337 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
338 dest += stride; | |
339 tmp0 = vec_perm (ref0, ref1, perm0); | |
340 tmp = vec_avg (tmp0, tmp1); | |
341 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
342 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
343 } | |
344 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
345 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
346 const int stride, int height) |
9857 | 347 { |
348 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
349 vector_u8_t ones; | |
350 | |
351 ones = vec_splat_u8 (1); | |
352 permA = vec_lvsl (0, ref); | |
353 permB = vec_add (permA, ones); | |
354 | |
355 height = (height >> 1) - 1; | |
356 | |
357 ref0 = vec_ld (0, ref); | |
358 ref1 = vec_ld (16, ref); | |
359 ref += stride; | |
360 A = vec_perm (ref0, ref1, permA); | |
361 B = vec_perm (ref0, ref1, permB); | |
362 avg0 = vec_avg (A, B); | |
363 xor0 = vec_xor (A, B); | |
364 | |
365 ref0 = vec_ld (0, ref); | |
366 ref1 = vec_ld (16, ref); | |
367 ref += stride; | |
368 A = vec_perm (ref0, ref1, permA); | |
369 B = vec_perm (ref0, ref1, permB); | |
370 avg1 = vec_avg (A, B); | |
371 xor1 = vec_xor (A, B); | |
372 tmp = vec_sub (vec_avg (avg0, avg1), | |
373 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
374 vec_xor (avg0, avg1))); | |
375 | |
376 do { | |
377 ref0 = vec_ld (0, ref); | |
378 ref1 = vec_ld (16, ref); | |
379 ref += stride; | |
380 vec_st (tmp, 0, dest); | |
381 A = vec_perm (ref0, ref1, permA); | |
382 B = vec_perm (ref0, ref1, permB); | |
383 avg0 = vec_avg (A, B); | |
384 xor0 = vec_xor (A, B); | |
385 tmp = vec_sub (vec_avg (avg0, avg1), | |
386 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
387 vec_xor (avg0, avg1))); | |
388 | |
389 ref0 = vec_ld (0, ref); | |
390 ref1 = vec_ld (16, ref); | |
391 ref += stride; | |
392 vec_st (tmp, stride, dest); | |
393 dest += 2*stride; | |
394 A = vec_perm (ref0, ref1, permA); | |
395 B = vec_perm (ref0, ref1, permB); | |
396 avg1 = vec_avg (A, B); | |
397 xor1 = vec_xor (A, B); | |
398 tmp = vec_sub (vec_avg (avg0, avg1), | |
399 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
400 vec_xor (avg0, avg1))); | |
401 } while (--height); | |
402 | |
403 ref0 = vec_ld (0, ref); | |
404 ref1 = vec_ld (16, ref); | |
405 vec_st (tmp, 0, dest); | |
406 A = vec_perm (ref0, ref1, permA); | |
407 B = vec_perm (ref0, ref1, permB); | |
408 avg0 = vec_avg (A, B); | |
409 xor0 = vec_xor (A, B); | |
410 tmp = vec_sub (vec_avg (avg0, avg1), | |
411 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
412 vec_xor (avg0, avg1))); | |
413 vec_st (tmp, stride, dest); | |
414 } | |
415 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
416 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
417 const int stride, int height) |
9857 | 418 { |
419 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
420 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; | |
421 | |
422 ones = vec_splat_u8 (1); | |
423 perm0A = vec_lvsl (0, ref); | |
424 perm0A = vec_mergeh (perm0A, perm0A); | |
425 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
426 perm0B = vec_add (perm0A, ones); | |
427 perm1A = vec_lvsl (stride, ref); | |
428 perm1A = vec_mergeh (perm1A, perm1A); | |
429 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
430 perm1B = vec_add (perm1A, ones); | |
431 | |
432 height = (height >> 1) - 1; | |
433 | |
434 ref0 = vec_ld (0, ref); | |
435 ref1 = vec_ld (8, ref); | |
436 ref += stride; | |
437 A = vec_perm (ref0, ref1, perm0A); | |
438 B = vec_perm (ref0, ref1, perm0B); | |
439 avg0 = vec_avg (A, B); | |
440 xor0 = vec_xor (A, B); | |
441 | |
442 ref0 = vec_ld (0, ref); | |
443 ref1 = vec_ld (8, ref); | |
444 ref += stride; | |
445 A = vec_perm (ref0, ref1, perm1A); | |
446 B = vec_perm (ref0, ref1, perm1B); | |
447 avg1 = vec_avg (A, B); | |
448 xor1 = vec_xor (A, B); | |
449 tmp = vec_sub (vec_avg (avg0, avg1), | |
450 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
451 vec_xor (avg0, avg1))); | |
452 | |
453 do { | |
454 ref0 = vec_ld (0, ref); | |
455 ref1 = vec_ld (8, ref); | |
456 ref += stride; | |
457 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
458 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
459 dest += stride; | |
460 A = vec_perm (ref0, ref1, perm0A); | |
461 B = vec_perm (ref0, ref1, perm0B); | |
462 avg0 = vec_avg (A, B); | |
463 xor0 = vec_xor (A, B); | |
464 tmp = vec_sub (vec_avg (avg0, avg1), | |
465 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
466 vec_xor (avg0, avg1))); | |
467 | |
468 ref0 = vec_ld (0, ref); | |
469 ref1 = vec_ld (8, ref); | |
470 ref += stride; | |
471 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
472 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
473 dest += stride; | |
474 A = vec_perm (ref0, ref1, perm1A); | |
475 B = vec_perm (ref0, ref1, perm1B); | |
476 avg1 = vec_avg (A, B); | |
477 xor1 = vec_xor (A, B); | |
478 tmp = vec_sub (vec_avg (avg0, avg1), | |
479 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
480 vec_xor (avg0, avg1))); | |
481 } while (--height); | |
482 | |
483 ref0 = vec_ld (0, ref); | |
484 ref1 = vec_ld (8, ref); | |
485 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
486 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
487 dest += stride; | |
488 A = vec_perm (ref0, ref1, perm0A); | |
489 B = vec_perm (ref0, ref1, perm0B); | |
490 avg0 = vec_avg (A, B); | |
491 xor0 = vec_xor (A, B); | |
492 tmp = vec_sub (vec_avg (avg0, avg1), | |
493 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
494 vec_xor (avg0, avg1))); | |
495 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
496 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
497 } | |
498 | |
499 #if 0 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
500 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
501 const int stride, int height) |
9857 | 502 { |
503 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; | |
504 vector_u16_t splat2, temp; | |
505 | |
506 ones = vec_splat_u8 (1); | |
507 permA = vec_lvsl (0, ref); | |
508 permB = vec_add (permA, ones); | |
509 | |
510 zero = vec_splat_u8 (0); | |
511 splat2 = vec_splat_u16 (2); | |
512 | |
513 do { | |
514 ref0 = vec_ld (0, ref); | |
515 ref1 = vec_ld (8, ref); | |
516 ref += stride; | |
517 A = vec_perm (ref0, ref1, permA); | |
518 B = vec_perm (ref0, ref1, permB); | |
519 ref0 = vec_ld (0, ref); | |
520 ref1 = vec_ld (8, ref); | |
521 C = vec_perm (ref0, ref1, permA); | |
522 D = vec_perm (ref0, ref1, permB); | |
523 | |
524 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), | |
525 (vector_u16_t)vec_mergeh (zero, B)), | |
526 vec_add ((vector_u16_t)vec_mergeh (zero, C), | |
527 (vector_u16_t)vec_mergeh (zero, D))); | |
528 temp = vec_sr (vec_add (temp, splat2), splat2); | |
529 tmp = vec_pack (temp, temp); | |
530 | |
531 vec_st (tmp, 0, dest); | |
532 dest += stride; | |
533 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
534 vec_perm (ref0, ref1, permB)); | |
535 } while (--height); | |
536 } | |
537 #endif | |
538 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
539 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
540 const int stride, int height) |
9857 | 541 { |
542 vector_u8_t perm, ref0, ref1, tmp, prev; | |
543 | |
544 perm = vec_lvsl (0, ref); | |
545 | |
546 height = (height >> 1) - 1; | |
547 | |
548 ref0 = vec_ld (0, ref); | |
549 ref1 = vec_ld (15, ref); | |
550 ref += stride; | |
551 prev = vec_ld (0, dest); | |
552 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
553 | |
554 do { | |
555 ref0 = vec_ld (0, ref); | |
556 ref1 = vec_ld (15, ref); | |
557 ref += stride; | |
558 prev = vec_ld (stride, dest); | |
559 vec_st (tmp, 0, dest); | |
560 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
561 | |
562 ref0 = vec_ld (0, ref); | |
563 ref1 = vec_ld (15, ref); | |
564 ref += stride; | |
565 prev = vec_ld (2*stride, dest); | |
566 vec_st (tmp, stride, dest); | |
567 dest += 2*stride; | |
568 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
569 } while (--height); | |
570 | |
571 ref0 = vec_ld (0, ref); | |
572 ref1 = vec_ld (15, ref); | |
573 prev = vec_ld (stride, dest); | |
574 vec_st (tmp, 0, dest); | |
575 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
576 vec_st (tmp, stride, dest); | |
577 } | |
578 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
579 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
580 const int stride, int height) |
9857 | 581 { |
582 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; | |
583 | |
584 tmp0 = vec_lvsl (0, ref); | |
585 tmp0 = vec_mergeh (tmp0, tmp0); | |
586 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
587 tmp1 = vec_lvsl (stride, ref); | |
588 tmp1 = vec_mergeh (tmp1, tmp1); | |
589 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
590 | |
591 height = (height >> 1) - 1; | |
592 | |
593 ref0 = vec_ld (0, ref); | |
594 ref1 = vec_ld (7, ref); | |
595 ref += stride; | |
596 prev = vec_ld (0, dest); | |
597 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
598 | |
599 do { | |
600 ref0 = vec_ld (0, ref); | |
601 ref1 = vec_ld (7, ref); | |
602 ref += stride; | |
603 prev = vec_ld (stride, dest); | |
604 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
605 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
606 dest += stride; | |
607 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
608 | |
609 ref0 = vec_ld (0, ref); | |
610 ref1 = vec_ld (7, ref); | |
611 ref += stride; | |
612 prev = vec_ld (stride, dest); | |
613 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
614 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
615 dest += stride; | |
616 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
617 } while (--height); | |
618 | |
619 ref0 = vec_ld (0, ref); | |
620 ref1 = vec_ld (7, ref); | |
621 prev = vec_ld (stride, dest); | |
622 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
623 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
624 dest += stride; | |
625 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
626 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
627 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
628 } | |
629 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
630 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
631 const int stride, int height) |
9857 | 632 { |
633 vector_u8_t permA, permB, ref0, ref1, tmp, prev; | |
634 | |
635 permA = vec_lvsl (0, ref); | |
636 permB = vec_add (permA, vec_splat_u8 (1)); | |
637 | |
638 height = (height >> 1) - 1; | |
639 | |
640 ref0 = vec_ld (0, ref); | |
641 ref1 = vec_ld (16, ref); | |
642 prev = vec_ld (0, dest); | |
643 ref += stride; | |
644 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
645 vec_perm (ref0, ref1, permB))); | |
646 | |
647 do { | |
648 ref0 = vec_ld (0, ref); | |
649 ref1 = vec_ld (16, ref); | |
650 ref += stride; | |
651 prev = vec_ld (stride, dest); | |
652 vec_st (tmp, 0, dest); | |
653 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
654 vec_perm (ref0, ref1, permB))); | |
655 | |
656 ref0 = vec_ld (0, ref); | |
657 ref1 = vec_ld (16, ref); | |
658 ref += stride; | |
659 prev = vec_ld (2*stride, dest); | |
660 vec_st (tmp, stride, dest); | |
661 dest += 2*stride; | |
662 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
663 vec_perm (ref0, ref1, permB))); | |
664 } while (--height); | |
665 | |
666 ref0 = vec_ld (0, ref); | |
667 ref1 = vec_ld (16, ref); | |
668 prev = vec_ld (stride, dest); | |
669 vec_st (tmp, 0, dest); | |
670 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
671 vec_perm (ref0, ref1, permB))); | |
672 vec_st (tmp, stride, dest); | |
673 } | |
674 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
675 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
676 const int stride, int height) |
9857 | 677 { |
678 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
679 vector_u8_t prev; | |
680 | |
681 ones = vec_splat_u8 (1); | |
682 tmp0 = vec_lvsl (0, ref); | |
683 tmp0 = vec_mergeh (tmp0, tmp0); | |
684 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
685 perm0B = vec_add (perm0A, ones); | |
686 tmp1 = vec_lvsl (stride, ref); | |
687 tmp1 = vec_mergeh (tmp1, tmp1); | |
688 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
689 perm1B = vec_add (perm1A, ones); | |
690 | |
691 height = (height >> 1) - 1; | |
692 | |
693 ref0 = vec_ld (0, ref); | |
694 ref1 = vec_ld (8, ref); | |
695 prev = vec_ld (0, dest); | |
696 ref += stride; | |
697 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
698 vec_perm (ref0, ref1, perm0B))); | |
699 | |
700 do { | |
701 ref0 = vec_ld (0, ref); | |
702 ref1 = vec_ld (8, ref); | |
703 ref += stride; | |
704 prev = vec_ld (stride, dest); | |
705 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
706 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
707 dest += stride; | |
708 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
709 vec_perm (ref0, ref1, perm1B))); | |
710 | |
711 ref0 = vec_ld (0, ref); | |
712 ref1 = vec_ld (8, ref); | |
713 ref += stride; | |
714 prev = vec_ld (stride, dest); | |
715 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
716 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
717 dest += stride; | |
718 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
719 vec_perm (ref0, ref1, perm0B))); | |
720 } while (--height); | |
721 | |
722 ref0 = vec_ld (0, ref); | |
723 ref1 = vec_ld (8, ref); | |
724 prev = vec_ld (stride, dest); | |
725 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
726 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
727 dest += stride; | |
728 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
729 vec_perm (ref0, ref1, perm1B))); | |
730 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
731 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
732 } | |
733 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
734 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
735 const int stride, int height) |
9857 | 736 { |
737 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; | |
738 | |
739 perm = vec_lvsl (0, ref); | |
740 | |
741 height = (height >> 1) - 1; | |
742 | |
743 ref0 = vec_ld (0, ref); | |
744 ref1 = vec_ld (15, ref); | |
745 ref += stride; | |
746 tmp0 = vec_perm (ref0, ref1, perm); | |
747 ref0 = vec_ld (0, ref); | |
748 ref1 = vec_ld (15, ref); | |
749 ref += stride; | |
750 prev = vec_ld (0, dest); | |
751 tmp1 = vec_perm (ref0, ref1, perm); | |
752 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
753 | |
754 do { | |
755 ref0 = vec_ld (0, ref); | |
756 ref1 = vec_ld (15, ref); | |
757 ref += stride; | |
758 prev = vec_ld (stride, dest); | |
759 vec_st (tmp, 0, dest); | |
760 tmp0 = vec_perm (ref0, ref1, perm); | |
761 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
762 | |
763 ref0 = vec_ld (0, ref); | |
764 ref1 = vec_ld (15, ref); | |
765 ref += stride; | |
766 prev = vec_ld (2*stride, dest); | |
767 vec_st (tmp, stride, dest); | |
768 dest += 2*stride; | |
769 tmp1 = vec_perm (ref0, ref1, perm); | |
770 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
771 } while (--height); | |
772 | |
773 ref0 = vec_ld (0, ref); | |
774 ref1 = vec_ld (15, ref); | |
775 prev = vec_ld (stride, dest); | |
776 vec_st (tmp, 0, dest); | |
777 tmp0 = vec_perm (ref0, ref1, perm); | |
778 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
779 vec_st (tmp, stride, dest); | |
780 } | |
781 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
782 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
783 const int stride, int height) |
9857 | 784 { |
785 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; | |
786 | |
787 tmp0 = vec_lvsl (0, ref); | |
788 tmp0 = vec_mergeh (tmp0, tmp0); | |
789 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
790 tmp1 = vec_lvsl (stride, ref); | |
791 tmp1 = vec_mergeh (tmp1, tmp1); | |
792 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
793 | |
794 height = (height >> 1) - 1; | |
795 | |
796 ref0 = vec_ld (0, ref); | |
797 ref1 = vec_ld (7, ref); | |
798 ref += stride; | |
799 tmp0 = vec_perm (ref0, ref1, perm0); | |
800 ref0 = vec_ld (0, ref); | |
801 ref1 = vec_ld (7, ref); | |
802 ref += stride; | |
803 prev = vec_ld (0, dest); | |
804 tmp1 = vec_perm (ref0, ref1, perm1); | |
805 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
806 | |
807 do { | |
808 ref0 = vec_ld (0, ref); | |
809 ref1 = vec_ld (7, ref); | |
810 ref += stride; | |
811 prev = vec_ld (stride, dest); | |
812 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
813 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
814 dest += stride; | |
815 tmp0 = vec_perm (ref0, ref1, perm0); | |
816 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
817 | |
818 ref0 = vec_ld (0, ref); | |
819 ref1 = vec_ld (7, ref); | |
820 ref += stride; | |
821 prev = vec_ld (stride, dest); | |
822 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
823 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
824 dest += stride; | |
825 tmp1 = vec_perm (ref0, ref1, perm1); | |
826 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
827 } while (--height); | |
828 | |
829 ref0 = vec_ld (0, ref); | |
830 ref1 = vec_ld (7, ref); | |
831 prev = vec_ld (stride, dest); | |
832 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
833 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
834 dest += stride; | |
835 tmp0 = vec_perm (ref0, ref1, perm0); | |
836 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
837 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
838 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
839 } | |
840 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
841 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
842 const int stride, int height) |
9857 | 843 { |
844 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
845 vector_u8_t ones, prev; | |
846 | |
847 ones = vec_splat_u8 (1); | |
848 permA = vec_lvsl (0, ref); | |
849 permB = vec_add (permA, ones); | |
850 | |
851 height = (height >> 1) - 1; | |
852 | |
853 ref0 = vec_ld (0, ref); | |
854 ref1 = vec_ld (16, ref); | |
855 ref += stride; | |
856 A = vec_perm (ref0, ref1, permA); | |
857 B = vec_perm (ref0, ref1, permB); | |
858 avg0 = vec_avg (A, B); | |
859 xor0 = vec_xor (A, B); | |
860 | |
861 ref0 = vec_ld (0, ref); | |
862 ref1 = vec_ld (16, ref); | |
863 ref += stride; | |
864 prev = vec_ld (0, dest); | |
865 A = vec_perm (ref0, ref1, permA); | |
866 B = vec_perm (ref0, ref1, permB); | |
867 avg1 = vec_avg (A, B); | |
868 xor1 = vec_xor (A, B); | |
869 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
870 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
871 vec_xor (avg0, avg1)))); | |
872 | |
873 do { | |
874 ref0 = vec_ld (0, ref); | |
875 ref1 = vec_ld (16, ref); | |
876 ref += stride; | |
877 prev = vec_ld (stride, dest); | |
878 vec_st (tmp, 0, dest); | |
879 A = vec_perm (ref0, ref1, permA); | |
880 B = vec_perm (ref0, ref1, permB); | |
881 avg0 = vec_avg (A, B); | |
882 xor0 = vec_xor (A, B); | |
883 tmp = vec_avg (prev, | |
884 vec_sub (vec_avg (avg0, avg1), | |
885 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
886 vec_xor (avg0, avg1)))); | |
887 | |
888 ref0 = vec_ld (0, ref); | |
889 ref1 = vec_ld (16, ref); | |
890 ref += stride; | |
891 prev = vec_ld (2*stride, dest); | |
892 vec_st (tmp, stride, dest); | |
893 dest += 2*stride; | |
894 A = vec_perm (ref0, ref1, permA); | |
895 B = vec_perm (ref0, ref1, permB); | |
896 avg1 = vec_avg (A, B); | |
897 xor1 = vec_xor (A, B); | |
898 tmp = vec_avg (prev, | |
899 vec_sub (vec_avg (avg0, avg1), | |
900 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
901 vec_xor (avg0, avg1)))); | |
902 } while (--height); | |
903 | |
904 ref0 = vec_ld (0, ref); | |
905 ref1 = vec_ld (16, ref); | |
906 prev = vec_ld (stride, dest); | |
907 vec_st (tmp, 0, dest); | |
908 A = vec_perm (ref0, ref1, permA); | |
909 B = vec_perm (ref0, ref1, permB); | |
910 avg0 = vec_avg (A, B); | |
911 xor0 = vec_xor (A, B); | |
912 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
913 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
914 vec_xor (avg0, avg1)))); | |
915 vec_st (tmp, stride, dest); | |
916 } | |
917 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
918 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
919 const int stride, int height) |
9857 | 920 { |
921 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
922 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; | |
923 | |
924 ones = vec_splat_u8 (1); | |
925 perm0A = vec_lvsl (0, ref); | |
926 perm0A = vec_mergeh (perm0A, perm0A); | |
927 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
928 perm0B = vec_add (perm0A, ones); | |
929 perm1A = vec_lvsl (stride, ref); | |
930 perm1A = vec_mergeh (perm1A, perm1A); | |
931 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
932 perm1B = vec_add (perm1A, ones); | |
933 | |
934 height = (height >> 1) - 1; | |
935 | |
936 ref0 = vec_ld (0, ref); | |
937 ref1 = vec_ld (8, ref); | |
938 ref += stride; | |
939 A = vec_perm (ref0, ref1, perm0A); | |
940 B = vec_perm (ref0, ref1, perm0B); | |
941 avg0 = vec_avg (A, B); | |
942 xor0 = vec_xor (A, B); | |
943 | |
944 ref0 = vec_ld (0, ref); | |
945 ref1 = vec_ld (8, ref); | |
946 ref += stride; | |
947 prev = vec_ld (0, dest); | |
948 A = vec_perm (ref0, ref1, perm1A); | |
949 B = vec_perm (ref0, ref1, perm1B); | |
950 avg1 = vec_avg (A, B); | |
951 xor1 = vec_xor (A, B); | |
952 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
953 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
954 vec_xor (avg0, avg1)))); | |
955 | |
956 do { | |
957 ref0 = vec_ld (0, ref); | |
958 ref1 = vec_ld (8, ref); | |
959 ref += stride; | |
960 prev = vec_ld (stride, dest); | |
961 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
962 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
963 dest += stride; | |
964 A = vec_perm (ref0, ref1, perm0A); | |
965 B = vec_perm (ref0, ref1, perm0B); | |
966 avg0 = vec_avg (A, B); | |
967 xor0 = vec_xor (A, B); | |
968 tmp = vec_avg (prev, | |
969 vec_sub (vec_avg (avg0, avg1), | |
970 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
971 vec_xor (avg0, avg1)))); | |
972 | |
973 ref0 = vec_ld (0, ref); | |
974 ref1 = vec_ld (8, ref); | |
975 ref += stride; | |
976 prev = vec_ld (stride, dest); | |
977 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
978 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
979 dest += stride; | |
980 A = vec_perm (ref0, ref1, perm1A); | |
981 B = vec_perm (ref0, ref1, perm1B); | |
982 avg1 = vec_avg (A, B); | |
983 xor1 = vec_xor (A, B); | |
984 tmp = vec_avg (prev, | |
985 vec_sub (vec_avg (avg0, avg1), | |
986 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
987 vec_xor (avg0, avg1)))); | |
988 } while (--height); | |
989 | |
990 ref0 = vec_ld (0, ref); | |
991 ref1 = vec_ld (8, ref); | |
992 prev = vec_ld (stride, dest); | |
993 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
994 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
995 dest += stride; | |
996 A = vec_perm (ref0, ref1, perm0A); | |
997 B = vec_perm (ref0, ref1, perm0B); | |
998 avg0 = vec_avg (A, B); | |
999 xor0 = vec_xor (A, B); | |
1000 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
1001 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1002 vec_xor (avg0, avg1)))); | |
1003 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1004 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1005 } | |
1006 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1007 MPEG2_MC_EXTERN (altivec) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1008 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1009 #endif |