Mercurial > mplayer.hg
annotate libmpeg2/motion_comp_altivec.c @ 15069:3f5daa60e049
support for negative strides (fixes -vf spp,flip crash)
author | henry |
---|---|
date | Fri, 08 Apr 2005 10:31:18 +0000 |
parents | d0a8810e155c |
children | 25337a2147e7 |
rev | line source |
---|---|
9857 | 1 /* |
2 * motion_comp_altivec.c | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
9857 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
7 * See http://libmpeg2.sourceforge.net/ for updates. | |
8 * | |
9 * mpeg2dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * mpeg2dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 */ | |
23 | |
24 #include "config.h" | |
25 | |
26 #ifdef ARCH_PPC | |
27 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
28 #ifdef HAVE_ALTIVEC_H |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
29 #include <altivec.h> |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
30 #endif |
9857 | 31 #include <inttypes.h> |
32 | |
33 #include "mpeg2.h" | |
12932 | 34 #include "attributes.h" |
9857 | 35 #include "mpeg2_internal.h" |
36 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
37 typedef vector signed char vector_s8_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
38 typedef vector unsigned char vector_u8_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
39 typedef vector signed short vector_s16_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
40 typedef vector unsigned short vector_u16_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
41 typedef vector signed int vector_s32_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
42 typedef vector unsigned int vector_u32_t; |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
43 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
44 #ifndef COFFEE_BREAK /* Workarounds for gcc suckage */ |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
45 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
46 static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
47 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
48 return vec_ld (A, (uint8_t *)B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
49 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
50 #undef vec_ld |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
51 #define vec_ld my_vec_ld |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
52 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
53 static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
54 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
55 return vec_and (A, B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
56 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
57 #undef vec_and |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
58 #define vec_and my_vec_and |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
59 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
60 static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
61 { |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
62 return vec_avg (A, B); |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
63 } |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
64 #undef vec_avg |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
65 #define vec_avg my_vec_avg |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
66 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
67 #endif |
9857 | 68 |
69 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
70 const int stride, int height) |
9857 | 71 { |
72 vector_u8_t perm, ref0, ref1, tmp; | |
73 | |
74 perm = vec_lvsl (0, ref); | |
75 | |
76 height = (height >> 1) - 1; | |
77 | |
78 ref0 = vec_ld (0, ref); | |
79 ref1 = vec_ld (15, ref); | |
80 ref += stride; | |
81 tmp = vec_perm (ref0, ref1, perm); | |
82 | |
83 do { | |
84 ref0 = vec_ld (0, ref); | |
85 ref1 = vec_ld (15, ref); | |
86 ref += stride; | |
87 vec_st (tmp, 0, dest); | |
88 tmp = vec_perm (ref0, ref1, perm); | |
89 | |
90 ref0 = vec_ld (0, ref); | |
91 ref1 = vec_ld (15, ref); | |
92 ref += stride; | |
93 vec_st (tmp, stride, dest); | |
94 dest += 2*stride; | |
95 tmp = vec_perm (ref0, ref1, perm); | |
96 } while (--height); | |
97 | |
98 ref0 = vec_ld (0, ref); | |
99 ref1 = vec_ld (15, ref); | |
100 vec_st (tmp, 0, dest); | |
101 tmp = vec_perm (ref0, ref1, perm); | |
102 vec_st (tmp, stride, dest); | |
103 } | |
104 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
105 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
106 const int stride, int height) |
9857 | 107 { |
108 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; | |
109 | |
110 tmp0 = vec_lvsl (0, ref); | |
111 tmp0 = vec_mergeh (tmp0, tmp0); | |
112 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
113 tmp1 = vec_lvsl (stride, ref); | |
114 tmp1 = vec_mergeh (tmp1, tmp1); | |
115 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
116 | |
117 height = (height >> 1) - 1; | |
118 | |
119 ref0 = vec_ld (0, ref); | |
120 ref1 = vec_ld (7, ref); | |
121 ref += stride; | |
122 tmp0 = vec_perm (ref0, ref1, perm0); | |
123 | |
124 do { | |
125 ref0 = vec_ld (0, ref); | |
126 ref1 = vec_ld (7, ref); | |
127 ref += stride; | |
128 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
129 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
130 dest += stride; | |
131 tmp1 = vec_perm (ref0, ref1, perm1); | |
132 | |
133 ref0 = vec_ld (0, ref); | |
134 ref1 = vec_ld (7, ref); | |
135 ref += stride; | |
136 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
137 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
138 dest += stride; | |
139 tmp0 = vec_perm (ref0, ref1, perm0); | |
140 } while (--height); | |
141 | |
142 ref0 = vec_ld (0, ref); | |
143 ref1 = vec_ld (7, ref); | |
144 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
145 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
146 dest += stride; | |
147 tmp1 = vec_perm (ref0, ref1, perm1); | |
148 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
149 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
150 } | |
151 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
152 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
153 const int stride, int height) |
9857 | 154 { |
155 vector_u8_t permA, permB, ref0, ref1, tmp; | |
156 | |
157 permA = vec_lvsl (0, ref); | |
158 permB = vec_add (permA, vec_splat_u8 (1)); | |
159 | |
160 height = (height >> 1) - 1; | |
161 | |
162 ref0 = vec_ld (0, ref); | |
163 ref1 = vec_ld (16, ref); | |
164 ref += stride; | |
165 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
166 vec_perm (ref0, ref1, permB)); | |
167 | |
168 do { | |
169 ref0 = vec_ld (0, ref); | |
170 ref1 = vec_ld (16, ref); | |
171 ref += stride; | |
172 vec_st (tmp, 0, dest); | |
173 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
174 vec_perm (ref0, ref1, permB)); | |
175 | |
176 ref0 = vec_ld (0, ref); | |
177 ref1 = vec_ld (16, ref); | |
178 ref += stride; | |
179 vec_st (tmp, stride, dest); | |
180 dest += 2*stride; | |
181 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
182 vec_perm (ref0, ref1, permB)); | |
183 } while (--height); | |
184 | |
185 ref0 = vec_ld (0, ref); | |
186 ref1 = vec_ld (16, ref); | |
187 vec_st (tmp, 0, dest); | |
188 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
189 vec_perm (ref0, ref1, permB)); | |
190 vec_st (tmp, stride, dest); | |
191 } | |
192 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
193 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
194 const int stride, int height) |
9857 | 195 { |
196 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
197 | |
198 ones = vec_splat_u8 (1); | |
199 tmp0 = vec_lvsl (0, ref); | |
200 tmp0 = vec_mergeh (tmp0, tmp0); | |
201 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
202 perm0B = vec_add (perm0A, ones); | |
203 tmp1 = vec_lvsl (stride, ref); | |
204 tmp1 = vec_mergeh (tmp1, tmp1); | |
205 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
206 perm1B = vec_add (perm1A, ones); | |
207 | |
208 height = (height >> 1) - 1; | |
209 | |
210 ref0 = vec_ld (0, ref); | |
211 ref1 = vec_ld (8, ref); | |
212 ref += stride; | |
213 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
214 vec_perm (ref0, ref1, perm0B)); | |
215 | |
216 do { | |
217 ref0 = vec_ld (0, ref); | |
218 ref1 = vec_ld (8, ref); | |
219 ref += stride; | |
220 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
221 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
222 dest += stride; | |
223 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
224 vec_perm (ref0, ref1, perm1B)); | |
225 | |
226 ref0 = vec_ld (0, ref); | |
227 ref1 = vec_ld (8, ref); | |
228 ref += stride; | |
229 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
230 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
231 dest += stride; | |
232 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
233 vec_perm (ref0, ref1, perm0B)); | |
234 } while (--height); | |
235 | |
236 ref0 = vec_ld (0, ref); | |
237 ref1 = vec_ld (8, ref); | |
238 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
239 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
240 dest += stride; | |
241 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
242 vec_perm (ref0, ref1, perm1B)); | |
243 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
244 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
245 } | |
246 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
247 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
248 const int stride, int height) |
9857 | 249 { |
250 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; | |
251 | |
252 perm = vec_lvsl (0, ref); | |
253 | |
254 height = (height >> 1) - 1; | |
255 | |
256 ref0 = vec_ld (0, ref); | |
257 ref1 = vec_ld (15, ref); | |
258 ref += stride; | |
259 tmp0 = vec_perm (ref0, ref1, perm); | |
260 ref0 = vec_ld (0, ref); | |
261 ref1 = vec_ld (15, ref); | |
262 ref += stride; | |
263 tmp1 = vec_perm (ref0, ref1, perm); | |
264 tmp = vec_avg (tmp0, tmp1); | |
265 | |
266 do { | |
267 ref0 = vec_ld (0, ref); | |
268 ref1 = vec_ld (15, ref); | |
269 ref += stride; | |
270 vec_st (tmp, 0, dest); | |
271 tmp0 = vec_perm (ref0, ref1, perm); | |
272 tmp = vec_avg (tmp0, tmp1); | |
273 | |
274 ref0 = vec_ld (0, ref); | |
275 ref1 = vec_ld (15, ref); | |
276 ref += stride; | |
277 vec_st (tmp, stride, dest); | |
278 dest += 2*stride; | |
279 tmp1 = vec_perm (ref0, ref1, perm); | |
280 tmp = vec_avg (tmp0, tmp1); | |
281 } while (--height); | |
282 | |
283 ref0 = vec_ld (0, ref); | |
284 ref1 = vec_ld (15, ref); | |
285 vec_st (tmp, 0, dest); | |
286 tmp0 = vec_perm (ref0, ref1, perm); | |
287 tmp = vec_avg (tmp0, tmp1); | |
288 vec_st (tmp, stride, dest); | |
289 } | |
290 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
291 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
292 const int stride, int height) |
9857 | 293 { |
294 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; | |
295 | |
296 tmp0 = vec_lvsl (0, ref); | |
297 tmp0 = vec_mergeh (tmp0, tmp0); | |
298 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
299 tmp1 = vec_lvsl (stride, ref); | |
300 tmp1 = vec_mergeh (tmp1, tmp1); | |
301 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
302 | |
303 height = (height >> 1) - 1; | |
304 | |
305 ref0 = vec_ld (0, ref); | |
306 ref1 = vec_ld (7, ref); | |
307 ref += stride; | |
308 tmp0 = vec_perm (ref0, ref1, perm0); | |
309 ref0 = vec_ld (0, ref); | |
310 ref1 = vec_ld (7, ref); | |
311 ref += stride; | |
312 tmp1 = vec_perm (ref0, ref1, perm1); | |
313 tmp = vec_avg (tmp0, tmp1); | |
314 | |
315 do { | |
316 ref0 = vec_ld (0, ref); | |
317 ref1 = vec_ld (7, ref); | |
318 ref += stride; | |
319 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
320 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
321 dest += stride; | |
322 tmp0 = vec_perm (ref0, ref1, perm0); | |
323 tmp = vec_avg (tmp0, tmp1); | |
324 | |
325 ref0 = vec_ld (0, ref); | |
326 ref1 = vec_ld (7, ref); | |
327 ref += stride; | |
328 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
329 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
330 dest += stride; | |
331 tmp1 = vec_perm (ref0, ref1, perm1); | |
332 tmp = vec_avg (tmp0, tmp1); | |
333 } while (--height); | |
334 | |
335 ref0 = vec_ld (0, ref); | |
336 ref1 = vec_ld (7, ref); | |
337 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
338 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
339 dest += stride; | |
340 tmp0 = vec_perm (ref0, ref1, perm0); | |
341 tmp = vec_avg (tmp0, tmp1); | |
342 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
343 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
344 } | |
345 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
346 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
347 const int stride, int height) |
9857 | 348 { |
349 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
350 vector_u8_t ones; | |
351 | |
352 ones = vec_splat_u8 (1); | |
353 permA = vec_lvsl (0, ref); | |
354 permB = vec_add (permA, ones); | |
355 | |
356 height = (height >> 1) - 1; | |
357 | |
358 ref0 = vec_ld (0, ref); | |
359 ref1 = vec_ld (16, ref); | |
360 ref += stride; | |
361 A = vec_perm (ref0, ref1, permA); | |
362 B = vec_perm (ref0, ref1, permB); | |
363 avg0 = vec_avg (A, B); | |
364 xor0 = vec_xor (A, B); | |
365 | |
366 ref0 = vec_ld (0, ref); | |
367 ref1 = vec_ld (16, ref); | |
368 ref += stride; | |
369 A = vec_perm (ref0, ref1, permA); | |
370 B = vec_perm (ref0, ref1, permB); | |
371 avg1 = vec_avg (A, B); | |
372 xor1 = vec_xor (A, B); | |
373 tmp = vec_sub (vec_avg (avg0, avg1), | |
374 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
375 vec_xor (avg0, avg1))); | |
376 | |
377 do { | |
378 ref0 = vec_ld (0, ref); | |
379 ref1 = vec_ld (16, ref); | |
380 ref += stride; | |
381 vec_st (tmp, 0, dest); | |
382 A = vec_perm (ref0, ref1, permA); | |
383 B = vec_perm (ref0, ref1, permB); | |
384 avg0 = vec_avg (A, B); | |
385 xor0 = vec_xor (A, B); | |
386 tmp = vec_sub (vec_avg (avg0, avg1), | |
387 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
388 vec_xor (avg0, avg1))); | |
389 | |
390 ref0 = vec_ld (0, ref); | |
391 ref1 = vec_ld (16, ref); | |
392 ref += stride; | |
393 vec_st (tmp, stride, dest); | |
394 dest += 2*stride; | |
395 A = vec_perm (ref0, ref1, permA); | |
396 B = vec_perm (ref0, ref1, permB); | |
397 avg1 = vec_avg (A, B); | |
398 xor1 = vec_xor (A, B); | |
399 tmp = vec_sub (vec_avg (avg0, avg1), | |
400 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
401 vec_xor (avg0, avg1))); | |
402 } while (--height); | |
403 | |
404 ref0 = vec_ld (0, ref); | |
405 ref1 = vec_ld (16, ref); | |
406 vec_st (tmp, 0, dest); | |
407 A = vec_perm (ref0, ref1, permA); | |
408 B = vec_perm (ref0, ref1, permB); | |
409 avg0 = vec_avg (A, B); | |
410 xor0 = vec_xor (A, B); | |
411 tmp = vec_sub (vec_avg (avg0, avg1), | |
412 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
413 vec_xor (avg0, avg1))); | |
414 vec_st (tmp, stride, dest); | |
415 } | |
416 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
417 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
418 const int stride, int height) |
9857 | 419 { |
420 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
421 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; | |
422 | |
423 ones = vec_splat_u8 (1); | |
424 perm0A = vec_lvsl (0, ref); | |
425 perm0A = vec_mergeh (perm0A, perm0A); | |
426 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
427 perm0B = vec_add (perm0A, ones); | |
428 perm1A = vec_lvsl (stride, ref); | |
429 perm1A = vec_mergeh (perm1A, perm1A); | |
430 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
431 perm1B = vec_add (perm1A, ones); | |
432 | |
433 height = (height >> 1) - 1; | |
434 | |
435 ref0 = vec_ld (0, ref); | |
436 ref1 = vec_ld (8, ref); | |
437 ref += stride; | |
438 A = vec_perm (ref0, ref1, perm0A); | |
439 B = vec_perm (ref0, ref1, perm0B); | |
440 avg0 = vec_avg (A, B); | |
441 xor0 = vec_xor (A, B); | |
442 | |
443 ref0 = vec_ld (0, ref); | |
444 ref1 = vec_ld (8, ref); | |
445 ref += stride; | |
446 A = vec_perm (ref0, ref1, perm1A); | |
447 B = vec_perm (ref0, ref1, perm1B); | |
448 avg1 = vec_avg (A, B); | |
449 xor1 = vec_xor (A, B); | |
450 tmp = vec_sub (vec_avg (avg0, avg1), | |
451 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
452 vec_xor (avg0, avg1))); | |
453 | |
454 do { | |
455 ref0 = vec_ld (0, ref); | |
456 ref1 = vec_ld (8, ref); | |
457 ref += stride; | |
458 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
459 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
460 dest += stride; | |
461 A = vec_perm (ref0, ref1, perm0A); | |
462 B = vec_perm (ref0, ref1, perm0B); | |
463 avg0 = vec_avg (A, B); | |
464 xor0 = vec_xor (A, B); | |
465 tmp = vec_sub (vec_avg (avg0, avg1), | |
466 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
467 vec_xor (avg0, avg1))); | |
468 | |
469 ref0 = vec_ld (0, ref); | |
470 ref1 = vec_ld (8, ref); | |
471 ref += stride; | |
472 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
473 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
474 dest += stride; | |
475 A = vec_perm (ref0, ref1, perm1A); | |
476 B = vec_perm (ref0, ref1, perm1B); | |
477 avg1 = vec_avg (A, B); | |
478 xor1 = vec_xor (A, B); | |
479 tmp = vec_sub (vec_avg (avg0, avg1), | |
480 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
481 vec_xor (avg0, avg1))); | |
482 } while (--height); | |
483 | |
484 ref0 = vec_ld (0, ref); | |
485 ref1 = vec_ld (8, ref); | |
486 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
487 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
488 dest += stride; | |
489 A = vec_perm (ref0, ref1, perm0A); | |
490 B = vec_perm (ref0, ref1, perm0B); | |
491 avg0 = vec_avg (A, B); | |
492 xor0 = vec_xor (A, B); | |
493 tmp = vec_sub (vec_avg (avg0, avg1), | |
494 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
495 vec_xor (avg0, avg1))); | |
496 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
497 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
498 } | |
499 | |
500 #if 0 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
501 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
502 const int stride, int height) |
9857 | 503 { |
504 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; | |
505 vector_u16_t splat2, temp; | |
506 | |
507 ones = vec_splat_u8 (1); | |
508 permA = vec_lvsl (0, ref); | |
509 permB = vec_add (permA, ones); | |
510 | |
511 zero = vec_splat_u8 (0); | |
512 splat2 = vec_splat_u16 (2); | |
513 | |
514 do { | |
515 ref0 = vec_ld (0, ref); | |
516 ref1 = vec_ld (8, ref); | |
517 ref += stride; | |
518 A = vec_perm (ref0, ref1, permA); | |
519 B = vec_perm (ref0, ref1, permB); | |
520 ref0 = vec_ld (0, ref); | |
521 ref1 = vec_ld (8, ref); | |
522 C = vec_perm (ref0, ref1, permA); | |
523 D = vec_perm (ref0, ref1, permB); | |
524 | |
525 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), | |
526 (vector_u16_t)vec_mergeh (zero, B)), | |
527 vec_add ((vector_u16_t)vec_mergeh (zero, C), | |
528 (vector_u16_t)vec_mergeh (zero, D))); | |
529 temp = vec_sr (vec_add (temp, splat2), splat2); | |
530 tmp = vec_pack (temp, temp); | |
531 | |
532 vec_st (tmp, 0, dest); | |
533 dest += stride; | |
534 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
535 vec_perm (ref0, ref1, permB)); | |
536 } while (--height); | |
537 } | |
538 #endif | |
539 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
540 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
541 const int stride, int height) |
9857 | 542 { |
543 vector_u8_t perm, ref0, ref1, tmp, prev; | |
544 | |
545 perm = vec_lvsl (0, ref); | |
546 | |
547 height = (height >> 1) - 1; | |
548 | |
549 ref0 = vec_ld (0, ref); | |
550 ref1 = vec_ld (15, ref); | |
551 ref += stride; | |
552 prev = vec_ld (0, dest); | |
553 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
554 | |
555 do { | |
556 ref0 = vec_ld (0, ref); | |
557 ref1 = vec_ld (15, ref); | |
558 ref += stride; | |
559 prev = vec_ld (stride, dest); | |
560 vec_st (tmp, 0, dest); | |
561 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
562 | |
563 ref0 = vec_ld (0, ref); | |
564 ref1 = vec_ld (15, ref); | |
565 ref += stride; | |
566 prev = vec_ld (2*stride, dest); | |
567 vec_st (tmp, stride, dest); | |
568 dest += 2*stride; | |
569 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
570 } while (--height); | |
571 | |
572 ref0 = vec_ld (0, ref); | |
573 ref1 = vec_ld (15, ref); | |
574 prev = vec_ld (stride, dest); | |
575 vec_st (tmp, 0, dest); | |
576 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
577 vec_st (tmp, stride, dest); | |
578 } | |
579 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
580 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
581 const int stride, int height) |
9857 | 582 { |
583 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; | |
584 | |
585 tmp0 = vec_lvsl (0, ref); | |
586 tmp0 = vec_mergeh (tmp0, tmp0); | |
587 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
588 tmp1 = vec_lvsl (stride, ref); | |
589 tmp1 = vec_mergeh (tmp1, tmp1); | |
590 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
591 | |
592 height = (height >> 1) - 1; | |
593 | |
594 ref0 = vec_ld (0, ref); | |
595 ref1 = vec_ld (7, ref); | |
596 ref += stride; | |
597 prev = vec_ld (0, dest); | |
598 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
599 | |
600 do { | |
601 ref0 = vec_ld (0, ref); | |
602 ref1 = vec_ld (7, ref); | |
603 ref += stride; | |
604 prev = vec_ld (stride, dest); | |
605 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
606 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
607 dest += stride; | |
608 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
609 | |
610 ref0 = vec_ld (0, ref); | |
611 ref1 = vec_ld (7, ref); | |
612 ref += stride; | |
613 prev = vec_ld (stride, dest); | |
614 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
615 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
616 dest += stride; | |
617 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
618 } while (--height); | |
619 | |
620 ref0 = vec_ld (0, ref); | |
621 ref1 = vec_ld (7, ref); | |
622 prev = vec_ld (stride, dest); | |
623 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
624 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
625 dest += stride; | |
626 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
627 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
628 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
629 } | |
630 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
631 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
632 const int stride, int height) |
9857 | 633 { |
634 vector_u8_t permA, permB, ref0, ref1, tmp, prev; | |
635 | |
636 permA = vec_lvsl (0, ref); | |
637 permB = vec_add (permA, vec_splat_u8 (1)); | |
638 | |
639 height = (height >> 1) - 1; | |
640 | |
641 ref0 = vec_ld (0, ref); | |
642 ref1 = vec_ld (16, ref); | |
643 prev = vec_ld (0, dest); | |
644 ref += stride; | |
645 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
646 vec_perm (ref0, ref1, permB))); | |
647 | |
648 do { | |
649 ref0 = vec_ld (0, ref); | |
650 ref1 = vec_ld (16, ref); | |
651 ref += stride; | |
652 prev = vec_ld (stride, dest); | |
653 vec_st (tmp, 0, dest); | |
654 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
655 vec_perm (ref0, ref1, permB))); | |
656 | |
657 ref0 = vec_ld (0, ref); | |
658 ref1 = vec_ld (16, ref); | |
659 ref += stride; | |
660 prev = vec_ld (2*stride, dest); | |
661 vec_st (tmp, stride, dest); | |
662 dest += 2*stride; | |
663 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
664 vec_perm (ref0, ref1, permB))); | |
665 } while (--height); | |
666 | |
667 ref0 = vec_ld (0, ref); | |
668 ref1 = vec_ld (16, ref); | |
669 prev = vec_ld (stride, dest); | |
670 vec_st (tmp, 0, dest); | |
671 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
672 vec_perm (ref0, ref1, permB))); | |
673 vec_st (tmp, stride, dest); | |
674 } | |
675 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
676 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
677 const int stride, int height) |
9857 | 678 { |
679 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
680 vector_u8_t prev; | |
681 | |
682 ones = vec_splat_u8 (1); | |
683 tmp0 = vec_lvsl (0, ref); | |
684 tmp0 = vec_mergeh (tmp0, tmp0); | |
685 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
686 perm0B = vec_add (perm0A, ones); | |
687 tmp1 = vec_lvsl (stride, ref); | |
688 tmp1 = vec_mergeh (tmp1, tmp1); | |
689 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
690 perm1B = vec_add (perm1A, ones); | |
691 | |
692 height = (height >> 1) - 1; | |
693 | |
694 ref0 = vec_ld (0, ref); | |
695 ref1 = vec_ld (8, ref); | |
696 prev = vec_ld (0, dest); | |
697 ref += stride; | |
698 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
699 vec_perm (ref0, ref1, perm0B))); | |
700 | |
701 do { | |
702 ref0 = vec_ld (0, ref); | |
703 ref1 = vec_ld (8, ref); | |
704 ref += stride; | |
705 prev = vec_ld (stride, dest); | |
706 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
707 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
708 dest += stride; | |
709 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
710 vec_perm (ref0, ref1, perm1B))); | |
711 | |
712 ref0 = vec_ld (0, ref); | |
713 ref1 = vec_ld (8, ref); | |
714 ref += stride; | |
715 prev = vec_ld (stride, dest); | |
716 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
717 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
718 dest += stride; | |
719 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
720 vec_perm (ref0, ref1, perm0B))); | |
721 } while (--height); | |
722 | |
723 ref0 = vec_ld (0, ref); | |
724 ref1 = vec_ld (8, ref); | |
725 prev = vec_ld (stride, dest); | |
726 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
727 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
728 dest += stride; | |
729 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
730 vec_perm (ref0, ref1, perm1B))); | |
731 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
732 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
733 } | |
734 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
735 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
736 const int stride, int height) |
9857 | 737 { |
738 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; | |
739 | |
740 perm = vec_lvsl (0, ref); | |
741 | |
742 height = (height >> 1) - 1; | |
743 | |
744 ref0 = vec_ld (0, ref); | |
745 ref1 = vec_ld (15, ref); | |
746 ref += stride; | |
747 tmp0 = vec_perm (ref0, ref1, perm); | |
748 ref0 = vec_ld (0, ref); | |
749 ref1 = vec_ld (15, ref); | |
750 ref += stride; | |
751 prev = vec_ld (0, dest); | |
752 tmp1 = vec_perm (ref0, ref1, perm); | |
753 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
754 | |
755 do { | |
756 ref0 = vec_ld (0, ref); | |
757 ref1 = vec_ld (15, ref); | |
758 ref += stride; | |
759 prev = vec_ld (stride, dest); | |
760 vec_st (tmp, 0, dest); | |
761 tmp0 = vec_perm (ref0, ref1, perm); | |
762 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
763 | |
764 ref0 = vec_ld (0, ref); | |
765 ref1 = vec_ld (15, ref); | |
766 ref += stride; | |
767 prev = vec_ld (2*stride, dest); | |
768 vec_st (tmp, stride, dest); | |
769 dest += 2*stride; | |
770 tmp1 = vec_perm (ref0, ref1, perm); | |
771 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
772 } while (--height); | |
773 | |
774 ref0 = vec_ld (0, ref); | |
775 ref1 = vec_ld (15, ref); | |
776 prev = vec_ld (stride, dest); | |
777 vec_st (tmp, 0, dest); | |
778 tmp0 = vec_perm (ref0, ref1, perm); | |
779 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
780 vec_st (tmp, stride, dest); | |
781 } | |
782 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
783 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
784 const int stride, int height) |
9857 | 785 { |
786 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; | |
787 | |
788 tmp0 = vec_lvsl (0, ref); | |
789 tmp0 = vec_mergeh (tmp0, tmp0); | |
790 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
791 tmp1 = vec_lvsl (stride, ref); | |
792 tmp1 = vec_mergeh (tmp1, tmp1); | |
793 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
794 | |
795 height = (height >> 1) - 1; | |
796 | |
797 ref0 = vec_ld (0, ref); | |
798 ref1 = vec_ld (7, ref); | |
799 ref += stride; | |
800 tmp0 = vec_perm (ref0, ref1, perm0); | |
801 ref0 = vec_ld (0, ref); | |
802 ref1 = vec_ld (7, ref); | |
803 ref += stride; | |
804 prev = vec_ld (0, dest); | |
805 tmp1 = vec_perm (ref0, ref1, perm1); | |
806 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
807 | |
808 do { | |
809 ref0 = vec_ld (0, ref); | |
810 ref1 = vec_ld (7, ref); | |
811 ref += stride; | |
812 prev = vec_ld (stride, dest); | |
813 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
814 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
815 dest += stride; | |
816 tmp0 = vec_perm (ref0, ref1, perm0); | |
817 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
818 | |
819 ref0 = vec_ld (0, ref); | |
820 ref1 = vec_ld (7, ref); | |
821 ref += stride; | |
822 prev = vec_ld (stride, dest); | |
823 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
824 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
825 dest += stride; | |
826 tmp1 = vec_perm (ref0, ref1, perm1); | |
827 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
828 } while (--height); | |
829 | |
830 ref0 = vec_ld (0, ref); | |
831 ref1 = vec_ld (7, ref); | |
832 prev = vec_ld (stride, dest); | |
833 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
834 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
835 dest += stride; | |
836 tmp0 = vec_perm (ref0, ref1, perm0); | |
837 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
838 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
839 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
840 } | |
841 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
842 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
843 const int stride, int height) |
9857 | 844 { |
845 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
846 vector_u8_t ones, prev; | |
847 | |
848 ones = vec_splat_u8 (1); | |
849 permA = vec_lvsl (0, ref); | |
850 permB = vec_add (permA, ones); | |
851 | |
852 height = (height >> 1) - 1; | |
853 | |
854 ref0 = vec_ld (0, ref); | |
855 ref1 = vec_ld (16, ref); | |
856 ref += stride; | |
857 A = vec_perm (ref0, ref1, permA); | |
858 B = vec_perm (ref0, ref1, permB); | |
859 avg0 = vec_avg (A, B); | |
860 xor0 = vec_xor (A, B); | |
861 | |
862 ref0 = vec_ld (0, ref); | |
863 ref1 = vec_ld (16, ref); | |
864 ref += stride; | |
865 prev = vec_ld (0, dest); | |
866 A = vec_perm (ref0, ref1, permA); | |
867 B = vec_perm (ref0, ref1, permB); | |
868 avg1 = vec_avg (A, B); | |
869 xor1 = vec_xor (A, B); | |
870 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
871 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
872 vec_xor (avg0, avg1)))); | |
873 | |
874 do { | |
875 ref0 = vec_ld (0, ref); | |
876 ref1 = vec_ld (16, ref); | |
877 ref += stride; | |
878 prev = vec_ld (stride, dest); | |
879 vec_st (tmp, 0, dest); | |
880 A = vec_perm (ref0, ref1, permA); | |
881 B = vec_perm (ref0, ref1, permB); | |
882 avg0 = vec_avg (A, B); | |
883 xor0 = vec_xor (A, B); | |
884 tmp = vec_avg (prev, | |
885 vec_sub (vec_avg (avg0, avg1), | |
886 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
887 vec_xor (avg0, avg1)))); | |
888 | |
889 ref0 = vec_ld (0, ref); | |
890 ref1 = vec_ld (16, ref); | |
891 ref += stride; | |
892 prev = vec_ld (2*stride, dest); | |
893 vec_st (tmp, stride, dest); | |
894 dest += 2*stride; | |
895 A = vec_perm (ref0, ref1, permA); | |
896 B = vec_perm (ref0, ref1, permB); | |
897 avg1 = vec_avg (A, B); | |
898 xor1 = vec_xor (A, B); | |
899 tmp = vec_avg (prev, | |
900 vec_sub (vec_avg (avg0, avg1), | |
901 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
902 vec_xor (avg0, avg1)))); | |
903 } while (--height); | |
904 | |
905 ref0 = vec_ld (0, ref); | |
906 ref1 = vec_ld (16, ref); | |
907 prev = vec_ld (stride, dest); | |
908 vec_st (tmp, 0, dest); | |
909 A = vec_perm (ref0, ref1, permA); | |
910 B = vec_perm (ref0, ref1, permB); | |
911 avg0 = vec_avg (A, B); | |
912 xor0 = vec_xor (A, B); | |
913 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
914 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
915 vec_xor (avg0, avg1)))); | |
916 vec_st (tmp, stride, dest); | |
917 } | |
918 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
919 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
920 const int stride, int height) |
9857 | 921 { |
922 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
923 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; | |
924 | |
925 ones = vec_splat_u8 (1); | |
926 perm0A = vec_lvsl (0, ref); | |
927 perm0A = vec_mergeh (perm0A, perm0A); | |
928 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
929 perm0B = vec_add (perm0A, ones); | |
930 perm1A = vec_lvsl (stride, ref); | |
931 perm1A = vec_mergeh (perm1A, perm1A); | |
932 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
933 perm1B = vec_add (perm1A, ones); | |
934 | |
935 height = (height >> 1) - 1; | |
936 | |
937 ref0 = vec_ld (0, ref); | |
938 ref1 = vec_ld (8, ref); | |
939 ref += stride; | |
940 A = vec_perm (ref0, ref1, perm0A); | |
941 B = vec_perm (ref0, ref1, perm0B); | |
942 avg0 = vec_avg (A, B); | |
943 xor0 = vec_xor (A, B); | |
944 | |
945 ref0 = vec_ld (0, ref); | |
946 ref1 = vec_ld (8, ref); | |
947 ref += stride; | |
948 prev = vec_ld (0, dest); | |
949 A = vec_perm (ref0, ref1, perm1A); | |
950 B = vec_perm (ref0, ref1, perm1B); | |
951 avg1 = vec_avg (A, B); | |
952 xor1 = vec_xor (A, B); | |
953 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
954 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
955 vec_xor (avg0, avg1)))); | |
956 | |
957 do { | |
958 ref0 = vec_ld (0, ref); | |
959 ref1 = vec_ld (8, ref); | |
960 ref += stride; | |
961 prev = vec_ld (stride, dest); | |
962 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
963 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
964 dest += stride; | |
965 A = vec_perm (ref0, ref1, perm0A); | |
966 B = vec_perm (ref0, ref1, perm0B); | |
967 avg0 = vec_avg (A, B); | |
968 xor0 = vec_xor (A, B); | |
969 tmp = vec_avg (prev, | |
970 vec_sub (vec_avg (avg0, avg1), | |
971 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
972 vec_xor (avg0, avg1)))); | |
973 | |
974 ref0 = vec_ld (0, ref); | |
975 ref1 = vec_ld (8, ref); | |
976 ref += stride; | |
977 prev = vec_ld (stride, dest); | |
978 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
979 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
980 dest += stride; | |
981 A = vec_perm (ref0, ref1, perm1A); | |
982 B = vec_perm (ref0, ref1, perm1B); | |
983 avg1 = vec_avg (A, B); | |
984 xor1 = vec_xor (A, B); | |
985 tmp = vec_avg (prev, | |
986 vec_sub (vec_avg (avg0, avg1), | |
987 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
988 vec_xor (avg0, avg1)))); | |
989 } while (--height); | |
990 | |
991 ref0 = vec_ld (0, ref); | |
992 ref1 = vec_ld (8, ref); | |
993 prev = vec_ld (stride, dest); | |
994 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
995 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
996 dest += stride; | |
997 A = vec_perm (ref0, ref1, perm0A); | |
998 B = vec_perm (ref0, ref1, perm0B); | |
999 avg0 = vec_avg (A, B); | |
1000 xor0 = vec_xor (A, B); | |
1001 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
1002 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1003 vec_xor (avg0, avg1)))); | |
1004 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1005 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1006 } | |
1007 | |
10271
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1008 MPEG2_MC_EXTERN (altivec) |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1009 |
f0e14d641160
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9857
diff
changeset
|
1010 #endif |