Mercurial > mplayer.hg
comparison libmpeg2/motion_comp_altivec.c @ 9857:89b48bc6c441
Importing libmpeg2 from mpeg2dec-0.3.1
author | arpi |
---|---|
date | Sun, 06 Apr 2003 16:41:49 +0000 |
parents | |
children | f0e14d641160 |
comparison
equal
deleted
inserted
replaced
9856:08496327b7ec | 9857:89b48bc6c441 |
---|---|
1 /* | |
2 * motion_comp_altivec.c | |
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> | |
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | |
5 * | |
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
7 * See http://libmpeg2.sourceforge.net/ for updates. | |
8 * | |
9 * mpeg2dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * mpeg2dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 */ | |
23 | |
24 #ifndef __ALTIVEC__ | |
25 | |
26 #include "config.h" | |
27 | |
28 #ifdef ARCH_PPC | |
29 | |
30 #include <inttypes.h> | |
31 | |
32 #include "mpeg2.h" | |
33 #include "mpeg2_internal.h" | |
34 | |
35 /* | |
36 * The asm code is generated with: | |
37 * | |
38 * gcc-2.95 -fvec -D__ALTIVEC__ -O9 -fomit-frame-pointer -mregnames -S | |
39 * motion_comp_altivec.c | |
40 * | |
41 * sed 's/.L/._L/g' motion_comp_altivec.s | | |
42 * awk '{args=""; len=split ($2, arg, ","); | |
43 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a","; | |
44 * args = args sprintf ("%-6s", a) } | |
45 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' | | |
46 * unexpand -a | |
47 */ | |
48 | |
49 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, | |
50 int stride, int height) | |
51 { | |
52 asm (" \n" | |
53 " srawi %r6, %r6, 1 \n" | |
54 " li %r9, 15 \n" | |
55 " addi %r6, %r6, -1 \n" | |
56 " lvsl %v12, 0, %r4 \n" | |
57 " mtctr %r6 \n" | |
58 " lvx %v1, 0, %r4 \n" | |
59 " lvx %v0, %r9, %r4 \n" | |
60 " add %r0, %r5, %r5 \n" | |
61 " vperm %v13, %v1, %v0, %v12 \n" | |
62 " add %r4, %r4, %r5 \n" | |
63 "._L6: \n" | |
64 " li %r9, 15 \n" | |
65 " lvx %v1, 0, %r4 \n" | |
66 " lvx %v0, %r9, %r4 \n" | |
67 " stvx %v13, 0, %r3 \n" | |
68 " vperm %v13, %v1, %v0, %v12 \n" | |
69 " add %r4, %r4, %r5 \n" | |
70 " lvx %v1, 0, %r4 \n" | |
71 " lvx %v0, %r9, %r4 \n" | |
72 " stvx %v13, %r5, %r3 \n" | |
73 " vperm %v13, %v1, %v0, %v12 \n" | |
74 " add %r4, %r4, %r5 \n" | |
75 " add %r3, %r3, %r0 \n" | |
76 " bdnz ._L6 \n" | |
77 " lvx %v0, %r9, %r4 \n" | |
78 " lvx %v1, 0, %r4 \n" | |
79 " stvx %v13, 0, %r3 \n" | |
80 " vperm %v13, %v1, %v0, %v12 \n" | |
81 " stvx %v13, %r5, %r3 \n" | |
82 ); | |
83 } | |
84 | |
85 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, | |
86 int stride, int height) | |
87 { | |
88 asm (" \n" | |
89 " lvsl %v12, 0, %r4 \n" | |
90 " lvsl %v1, %r5, %r4 \n" | |
91 " vmrghb %v12, %v12, %v12 \n" | |
92 " srawi %r6, %r6, 1 \n" | |
93 " li %r9, 7 \n" | |
94 " vmrghb %v1, %v1, %v1 \n" | |
95 " addi %r6, %r6, -1 \n" | |
96 " vpkuhum %v10, %v12, %v12 \n" | |
97 " lvx %v13, 0, %r4 \n" | |
98 " mtctr %r6 \n" | |
99 " vpkuhum %v11, %v1, %v1 \n" | |
100 " lvx %v0, %r9, %r4 \n" | |
101 " add %r4, %r4, %r5 \n" | |
102 " vperm %v12, %v13, %v0, %v10 \n" | |
103 "._L11: \n" | |
104 " li %r9, 7 \n" | |
105 " lvx %v0, %r9, %r4 \n" | |
106 " lvx %v13, 0, %r4 \n" | |
107 " stvewx %v12, 0, %r3 \n" | |
108 " li %r9, 4 \n" | |
109 " vperm %v1, %v13, %v0, %v11 \n" | |
110 " stvewx %v12, %r9, %r3 \n" | |
111 " add %r4, %r4, %r5 \n" | |
112 " li %r9, 7 \n" | |
113 " lvx %v0, %r9, %r4 \n" | |
114 " lvx %v13, 0, %r4 \n" | |
115 " add %r3, %r3, %r5 \n" | |
116 " stvewx %v1, 0, %r3 \n" | |
117 " vperm %v12, %v13, %v0, %v10 \n" | |
118 " li %r9, 4 \n" | |
119 " stvewx %v1, %r9, %r3 \n" | |
120 " add %r4, %r4, %r5 \n" | |
121 " add %r3, %r3, %r5 \n" | |
122 " bdnz ._L11 \n" | |
123 " li %r9, 7 \n" | |
124 " lvx %v0, %r9, %r4 \n" | |
125 " lvx %v13, 0, %r4 \n" | |
126 " stvewx %v12, 0, %r3 \n" | |
127 " li %r9, 4 \n" | |
128 " vperm %v1, %v13, %v0, %v11 \n" | |
129 " stvewx %v12, %r9, %r3 \n" | |
130 " add %r3, %r3, %r5 \n" | |
131 " stvewx %v1, 0, %r3 \n" | |
132 " stvewx %v1, %r9, %r3 \n" | |
133 ); | |
134 } | |
135 | |
136 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, | |
137 int stride, int height) | |
138 { | |
139 asm (" \n" | |
140 " lvsl %v11, 0, %r4 \n" | |
141 " vspltisb %v0, 1 \n" | |
142 " li %r9, 16 \n" | |
143 " lvx %v12, 0, %r4 \n" | |
144 " vaddubm %v10, %v11, %v0 \n" | |
145 " lvx %v13, %r9, %r4 \n" | |
146 " srawi %r6, %r6, 1 \n" | |
147 " addi %r6, %r6, -1 \n" | |
148 " vperm %v1, %v12, %v13, %v10 \n" | |
149 " vperm %v0, %v12, %v13, %v11 \n" | |
150 " mtctr %r6 \n" | |
151 " add %r0, %r5, %r5 \n" | |
152 " add %r4, %r4, %r5 \n" | |
153 " vavgub %v0, %v0, %v1 \n" | |
154 "._L16: \n" | |
155 " li %r9, 16 \n" | |
156 " lvx %v12, 0, %r4 \n" | |
157 " lvx %v13, %r9, %r4 \n" | |
158 " stvx %v0, 0, %r3 \n" | |
159 " vperm %v1, %v12, %v13, %v10 \n" | |
160 " add %r4, %r4, %r5 \n" | |
161 " vperm %v0, %v12, %v13, %v11 \n" | |
162 " lvx %v12, 0, %r4 \n" | |
163 " lvx %v13, %r9, %r4 \n" | |
164 " vavgub %v0, %v0, %v1 \n" | |
165 " stvx %v0, %r5, %r3 \n" | |
166 " vperm %v1, %v12, %v13, %v10 \n" | |
167 " add %r4, %r4, %r5 \n" | |
168 " vperm %v0, %v12, %v13, %v11 \n" | |
169 " add %r3, %r3, %r0 \n" | |
170 " vavgub %v0, %v0, %v1 \n" | |
171 " bdnz ._L16 \n" | |
172 " lvx %v13, %r9, %r4 \n" | |
173 " lvx %v12, 0, %r4 \n" | |
174 " stvx %v0, 0, %r3 \n" | |
175 " vperm %v1, %v12, %v13, %v10 \n" | |
176 " vperm %v0, %v12, %v13, %v11 \n" | |
177 " vavgub %v0, %v0, %v1 \n" | |
178 " stvx %v0, %r5, %r3 \n" | |
179 ); | |
180 } | |
181 | |
182 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, | |
183 int stride, int height) | |
184 { | |
185 asm (" \n" | |
186 " lvsl %v0, 0, %r4 \n" | |
187 " vspltisb %v13, 1 \n" | |
188 " lvsl %v10, %r5, %r4 \n" | |
189 " vmrghb %v0, %v0, %v0 \n" | |
190 " li %r9, 8 \n" | |
191 " lvx %v11, 0, %r4 \n" | |
192 " vmrghb %v10, %v10, %v10 \n" | |
193 " vpkuhum %v8, %v0, %v0 \n" | |
194 " lvx %v12, %r9, %r4 \n" | |
195 " srawi %r6, %r6, 1 \n" | |
196 " vpkuhum %v9, %v10, %v10 \n" | |
197 " vaddubm %v7, %v8, %v13 \n" | |
198 " addi %r6, %r6, -1 \n" | |
199 " vperm %v1, %v11, %v12, %v8 \n" | |
200 " mtctr %r6 \n" | |
201 " vaddubm %v13, %v9, %v13 \n" | |
202 " add %r4, %r4, %r5 \n" | |
203 " vperm %v0, %v11, %v12, %v7 \n" | |
204 " vavgub %v0, %v1, %v0 \n" | |
205 "._L21: \n" | |
206 " li %r9, 8 \n" | |
207 " lvx %v12, %r9, %r4 \n" | |
208 " lvx %v11, 0, %r4 \n" | |
209 " stvewx %v0, 0, %r3 \n" | |
210 " li %r9, 4 \n" | |
211 " vperm %v1, %v11, %v12, %v13 \n" | |
212 " stvewx %v0, %r9, %r3 \n" | |
213 " vperm %v0, %v11, %v12, %v9 \n" | |
214 " add %r4, %r4, %r5 \n" | |
215 " li %r9, 8 \n" | |
216 " lvx %v12, %r9, %r4 \n" | |
217 " vavgub %v10, %v0, %v1 \n" | |
218 " lvx %v11, 0, %r4 \n" | |
219 " add %r3, %r3, %r5 \n" | |
220 " stvewx %v10, 0, %r3 \n" | |
221 " vperm %v1, %v11, %v12, %v7 \n" | |
222 " vperm %v0, %v11, %v12, %v8 \n" | |
223 " li %r9, 4 \n" | |
224 " stvewx %v10, %r9, %r3 \n" | |
225 " add %r4, %r4, %r5 \n" | |
226 " vavgub %v0, %v0, %v1 \n" | |
227 " add %r3, %r3, %r5 \n" | |
228 " bdnz ._L21 \n" | |
229 " li %r9, 8 \n" | |
230 " lvx %v12, %r9, %r4 \n" | |
231 " lvx %v11, 0, %r4 \n" | |
232 " stvewx %v0, 0, %r3 \n" | |
233 " li %r9, 4 \n" | |
234 " vperm %v1, %v11, %v12, %v13 \n" | |
235 " stvewx %v0, %r9, %r3 \n" | |
236 " vperm %v0, %v11, %v12, %v9 \n" | |
237 " add %r3, %r3, %r5 \n" | |
238 " vavgub %v10, %v0, %v1 \n" | |
239 " stvewx %v10, 0, %r3 \n" | |
240 " stvewx %v10, %r9, %r3 \n" | |
241 ); | |
242 } | |
243 | |
244 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, | |
245 int stride, int height) | |
246 { | |
247 asm (" \n" | |
248 " li %r9, 15 \n" | |
249 " lvsl %v10, 0, %r4 \n" | |
250 " lvx %v13, 0, %r4 \n" | |
251 " lvx %v1, %r9, %r4 \n" | |
252 " add %r4, %r4, %r5 \n" | |
253 " vperm %v12, %v13, %v1, %v10 \n" | |
254 " srawi %r6, %r6, 1 \n" | |
255 " lvx %v13, 0, %r4 \n" | |
256 " lvx %v1, %r9, %r4 \n" | |
257 " addi %r6, %r6, -1 \n" | |
258 " vperm %v11, %v13, %v1, %v10 \n" | |
259 " mtctr %r6 \n" | |
260 " add %r0, %r5, %r5 \n" | |
261 " add %r4, %r4, %r5 \n" | |
262 " vavgub %v0, %v12, %v11 \n" | |
263 "._L26: \n" | |
264 " li %r9, 15 \n" | |
265 " lvx %v13, 0, %r4 \n" | |
266 " lvx %v1, %r9, %r4 \n" | |
267 " stvx %v0, 0, %r3 \n" | |
268 " vperm %v12, %v13, %v1, %v10 \n" | |
269 " add %r4, %r4, %r5 \n" | |
270 " lvx %v13, 0, %r4 \n" | |
271 " lvx %v1, %r9, %r4 \n" | |
272 " vavgub %v0, %v12, %v11 \n" | |
273 " stvx %v0, %r5, %r3 \n" | |
274 " vperm %v11, %v13, %v1, %v10 \n" | |
275 " add %r4, %r4, %r5 \n" | |
276 " add %r3, %r3, %r0 \n" | |
277 " vavgub %v0, %v12, %v11 \n" | |
278 " bdnz ._L26 \n" | |
279 " lvx %v1, %r9, %r4 \n" | |
280 " lvx %v13, 0, %r4 \n" | |
281 " stvx %v0, 0, %r3 \n" | |
282 " vperm %v12, %v13, %v1, %v10 \n" | |
283 " vavgub %v0, %v12, %v11 \n" | |
284 " stvx %v0, %r5, %r3 \n" | |
285 ); | |
286 } | |
287 | |
288 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, | |
289 int stride, int height) | |
290 { | |
291 asm (" \n" | |
292 " lvsl %v13, 0, %r4 \n" | |
293 " lvsl %v11, %r5, %r4 \n" | |
294 " vmrghb %v13, %v13, %v13 \n" | |
295 " li %r9, 7 \n" | |
296 " lvx %v12, 0, %r4 \n" | |
297 " vmrghb %v11, %v11, %v11 \n" | |
298 " lvx %v1, %r9, %r4 \n" | |
299 " vpkuhum %v9, %v13, %v13 \n" | |
300 " add %r4, %r4, %r5 \n" | |
301 " vpkuhum %v10, %v11, %v11 \n" | |
302 " vperm %v13, %v12, %v1, %v9 \n" | |
303 " srawi %r6, %r6, 1 \n" | |
304 " lvx %v12, 0, %r4 \n" | |
305 " lvx %v1, %r9, %r4 \n" | |
306 " addi %r6, %r6, -1 \n" | |
307 " vperm %v11, %v12, %v1, %v10 \n" | |
308 " mtctr %r6 \n" | |
309 " add %r4, %r4, %r5 \n" | |
310 " vavgub %v0, %v13, %v11 \n" | |
311 "._L31: \n" | |
312 " li %r9, 7 \n" | |
313 " lvx %v1, %r9, %r4 \n" | |
314 " lvx %v12, 0, %r4 \n" | |
315 " stvewx %v0, 0, %r3 \n" | |
316 " li %r9, 4 \n" | |
317 " vperm %v13, %v12, %v1, %v9 \n" | |
318 " stvewx %v0, %r9, %r3 \n" | |
319 " add %r4, %r4, %r5 \n" | |
320 " vavgub %v0, %v13, %v11 \n" | |
321 " li %r9, 7 \n" | |
322 " lvx %v1, %r9, %r4 \n" | |
323 " lvx %v12, 0, %r4 \n" | |
324 " add %r3, %r3, %r5 \n" | |
325 " stvewx %v0, 0, %r3 \n" | |
326 " vperm %v11, %v12, %v1, %v10 \n" | |
327 " li %r9, 4 \n" | |
328 " stvewx %v0, %r9, %r3 \n" | |
329 " vavgub %v0, %v13, %v11 \n" | |
330 " add %r4, %r4, %r5 \n" | |
331 " add %r3, %r3, %r5 \n" | |
332 " bdnz ._L31 \n" | |
333 " li %r9, 7 \n" | |
334 " lvx %v1, %r9, %r4 \n" | |
335 " lvx %v12, 0, %r4 \n" | |
336 " stvewx %v0, 0, %r3 \n" | |
337 " li %r9, 4 \n" | |
338 " vperm %v13, %v12, %v1, %v9 \n" | |
339 " stvewx %v0, %r9, %r3 \n" | |
340 " add %r3, %r3, %r5 \n" | |
341 " vavgub %v0, %v13, %v11 \n" | |
342 " stvewx %v0, 0, %r3 \n" | |
343 " stvewx %v0, %r9, %r3 \n" | |
344 ); | |
345 } | |
346 | |
347 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, | |
348 int stride, int height) | |
349 { | |
350 asm (" \n" | |
351 " lvsl %v5, 0, %r4 \n" | |
352 " vspltisb %v3, 1 \n" | |
353 " li %r9, 16 \n" | |
354 " lvx %v1, 0, %r4 \n" | |
355 " vaddubm %v4, %v5, %v3 \n" | |
356 " lvx %v0, %r9, %r4 \n" | |
357 " add %r4, %r4, %r5 \n" | |
358 " vperm %v10, %v1, %v0, %v4 \n" | |
359 " srawi %r6, %r6, 1 \n" | |
360 " vperm %v11, %v1, %v0, %v5 \n" | |
361 " addi %r6, %r6, -1 \n" | |
362 " lvx %v1, 0, %r4 \n" | |
363 " mtctr %r6 \n" | |
364 " lvx %v0, %r9, %r4 \n" | |
365 " vavgub %v9, %v11, %v10 \n" | |
366 " vxor %v8, %v11, %v10 \n" | |
367 " add %r0, %r5, %r5 \n" | |
368 " vperm %v10, %v1, %v0, %v4 \n" | |
369 " add %r4, %r4, %r5 \n" | |
370 " vperm %v11, %v1, %v0, %v5 \n" | |
371 " vxor %v6, %v11, %v10 \n" | |
372 " vavgub %v7, %v11, %v10 \n" | |
373 " vor %v0, %v8, %v6 \n" | |
374 " vxor %v13, %v9, %v7 \n" | |
375 " vand %v0, %v3, %v0 \n" | |
376 " vavgub %v1, %v9, %v7 \n" | |
377 " vand %v0, %v0, %v13 \n" | |
378 " vsububm %v13, %v1, %v0 \n" | |
379 "._L36: \n" | |
380 " li %r9, 16 \n" | |
381 " lvx %v1, 0, %r4 \n" | |
382 " lvx %v0, %r9, %r4 \n" | |
383 " stvx %v13, 0, %r3 \n" | |
384 " vperm %v10, %v1, %v0, %v4 \n" | |
385 " add %r4, %r4, %r5 \n" | |
386 " vperm %v11, %v1, %v0, %v5 \n" | |
387 " lvx %v1, 0, %r4 \n" | |
388 " lvx %v0, %r9, %r4 \n" | |
389 " vavgub %v9, %v11, %v10 \n" | |
390 " vxor %v8, %v11, %v10 \n" | |
391 " add %r4, %r4, %r5 \n" | |
392 " vperm %v10, %v1, %v0, %v4 \n" | |
393 " vavgub %v12, %v9, %v7 \n" | |
394 " vperm %v11, %v1, %v0, %v5 \n" | |
395 " vor %v13, %v8, %v6 \n" | |
396 " vxor %v0, %v9, %v7 \n" | |
397 " vxor %v6, %v11, %v10 \n" | |
398 " vand %v13, %v3, %v13 \n" | |
399 " vavgub %v7, %v11, %v10 \n" | |
400 " vor %v1, %v8, %v6 \n" | |
401 " vand %v13, %v13, %v0 \n" | |
402 " vxor %v0, %v9, %v7 \n" | |
403 " vand %v1, %v3, %v1 \n" | |
404 " vsububm %v13, %v12, %v13 \n" | |
405 " vand %v1, %v1, %v0 \n" | |
406 " stvx %v13, %r5, %r3 \n" | |
407 " vavgub %v0, %v9, %v7 \n" | |
408 " add %r3, %r3, %r0 \n" | |
409 " vsububm %v13, %v0, %v1 \n" | |
410 " bdnz ._L36 \n" | |
411 " lvx %v0, %r9, %r4 \n" | |
412 " lvx %v1, 0, %r4 \n" | |
413 " stvx %v13, 0, %r3 \n" | |
414 " vperm %v10, %v1, %v0, %v4 \n" | |
415 " vperm %v11, %v1, %v0, %v5 \n" | |
416 " vxor %v8, %v11, %v10 \n" | |
417 " vavgub %v9, %v11, %v10 \n" | |
418 " vor %v0, %v8, %v6 \n" | |
419 " vxor %v13, %v9, %v7 \n" | |
420 " vand %v0, %v3, %v0 \n" | |
421 " vavgub %v1, %v9, %v7 \n" | |
422 " vand %v0, %v0, %v13 \n" | |
423 " vsububm %v13, %v1, %v0 \n" | |
424 " stvx %v13, %r5, %r3 \n" | |
425 ); | |
426 } | |
427 | |
428 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, | |
429 int stride, int height) | |
430 { | |
431 asm (" \n" | |
432 " lvsl %v4, 0, %r4 \n" | |
433 " vspltisb %v3, 1 \n" | |
434 " lvsl %v5, %r5, %r4 \n" | |
435 " vmrghb %v4, %v4, %v4 \n" | |
436 " li %r9, 8 \n" | |
437 " vmrghb %v5, %v5, %v5 \n" | |
438 " lvx %v1, 0, %r4 \n" | |
439 " vpkuhum %v4, %v4, %v4 \n" | |
440 " lvx %v0, %r9, %r4 \n" | |
441 " vpkuhum %v5, %v5, %v5 \n" | |
442 " add %r4, %r4, %r5 \n" | |
443 " vaddubm %v2, %v4, %v3 \n" | |
444 " vperm %v11, %v1, %v0, %v4 \n" | |
445 " srawi %r6, %r6, 1 \n" | |
446 " vaddubm %v19, %v5, %v3 \n" | |
447 " addi %r6, %r6, -1 \n" | |
448 " vperm %v10, %v1, %v0, %v2 \n" | |
449 " mtctr %r6 \n" | |
450 " lvx %v1, 0, %r4 \n" | |
451 " lvx %v0, %r9, %r4 \n" | |
452 " vavgub %v9, %v11, %v10 \n" | |
453 " vxor %v8, %v11, %v10 \n" | |
454 " add %r4, %r4, %r5 \n" | |
455 " vperm %v10, %v1, %v0, %v19 \n" | |
456 " vperm %v11, %v1, %v0, %v5 \n" | |
457 " vxor %v6, %v11, %v10 \n" | |
458 " vavgub %v7, %v11, %v10 \n" | |
459 " vor %v0, %v8, %v6 \n" | |
460 " vxor %v13, %v9, %v7 \n" | |
461 " vand %v0, %v3, %v0 \n" | |
462 " vavgub %v1, %v9, %v7 \n" | |
463 " vand %v0, %v0, %v13 \n" | |
464 " vsububm %v13, %v1, %v0 \n" | |
465 "._L41: \n" | |
466 " li %r9, 8 \n" | |
467 " lvx %v0, %r9, %r4 \n" | |
468 " lvx %v1, 0, %r4 \n" | |
469 " stvewx %v13, 0, %r3 \n" | |
470 " li %r9, 4 \n" | |
471 " vperm %v10, %v1, %v0, %v2 \n" | |
472 " stvewx %v13, %r9, %r3 \n" | |
473 " vperm %v11, %v1, %v0, %v4 \n" | |
474 " add %r4, %r4, %r5 \n" | |
475 " li %r9, 8 \n" | |
476 " vavgub %v9, %v11, %v10 \n" | |
477 " lvx %v0, %r9, %r4 \n" | |
478 " vxor %v8, %v11, %v10 \n" | |
479 " lvx %v1, 0, %r4 \n" | |
480 " vavgub %v12, %v9, %v7 \n" | |
481 " vor %v13, %v8, %v6 \n" | |
482 " add %r3, %r3, %r5 \n" | |
483 " vperm %v10, %v1, %v0, %v19 \n" | |
484 " li %r9, 4 \n" | |
485 " vperm %v11, %v1, %v0, %v5 \n" | |
486 " vand %v13, %v3, %v13 \n" | |
487 " add %r4, %r4, %r5 \n" | |
488 " vxor %v0, %v9, %v7 \n" | |
489 " vxor %v6, %v11, %v10 \n" | |
490 " vavgub %v7, %v11, %v10 \n" | |
491 " vor %v1, %v8, %v6 \n" | |
492 " vand %v13, %v13, %v0 \n" | |
493 " vxor %v0, %v9, %v7 \n" | |
494 " vand %v1, %v3, %v1 \n" | |
495 " vsububm %v13, %v12, %v13 \n" | |
496 " vand %v1, %v1, %v0 \n" | |
497 " stvewx %v13, 0, %r3 \n" | |
498 " vavgub %v0, %v9, %v7 \n" | |
499 " stvewx %v13, %r9, %r3 \n" | |
500 " add %r3, %r3, %r5 \n" | |
501 " vsububm %v13, %v0, %v1 \n" | |
502 " bdnz ._L41 \n" | |
503 " li %r9, 8 \n" | |
504 " lvx %v0, %r9, %r4 \n" | |
505 " lvx %v1, 0, %r4 \n" | |
506 " stvewx %v13, 0, %r3 \n" | |
507 " vperm %v10, %v1, %v0, %v2 \n" | |
508 " li %r9, 4 \n" | |
509 " vperm %v11, %v1, %v0, %v4 \n" | |
510 " stvewx %v13, %r9, %r3 \n" | |
511 " add %r3, %r3, %r5 \n" | |
512 " vxor %v8, %v11, %v10 \n" | |
513 " vavgub %v9, %v11, %v10 \n" | |
514 " vor %v0, %v8, %v6 \n" | |
515 " vxor %v13, %v9, %v7 \n" | |
516 " vand %v0, %v3, %v0 \n" | |
517 " vavgub %v1, %v9, %v7 \n" | |
518 " vand %v0, %v0, %v13 \n" | |
519 " vsububm %v13, %v1, %v0 \n" | |
520 " stvewx %v13, 0, %r3 \n" | |
521 " stvewx %v13, %r9, %r3 \n" | |
522 ); | |
523 } | |
524 | |
525 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, | |
526 int stride, int height) | |
527 { | |
528 asm (" \n" | |
529 " li %r9, 15 \n" | |
530 " lvx %v0, %r9, %r4 \n" | |
531 " lvsl %v11, 0, %r4 \n" | |
532 " lvx %v1, 0, %r4 \n" | |
533 " srawi %r6, %r6, 1 \n" | |
534 " addi %r6, %r6, -1 \n" | |
535 " vperm %v0, %v1, %v0, %v11 \n" | |
536 " lvx %v13, 0, %r3 \n" | |
537 " mtctr %r6 \n" | |
538 " add %r9, %r5, %r5 \n" | |
539 " vavgub %v12, %v13, %v0 \n" | |
540 " add %r4, %r4, %r5 \n" | |
541 "._L46: \n" | |
542 " li %r11, 15 \n" | |
543 " lvx %v1, 0, %r4 \n" | |
544 " lvx %v0, %r11, %r4 \n" | |
545 " lvx %v13, %r5, %r3 \n" | |
546 " vperm %v0, %v1, %v0, %v11 \n" | |
547 " stvx %v12, 0, %r3 \n" | |
548 " add %r4, %r4, %r5 \n" | |
549 " vavgub %v12, %v13, %v0 \n" | |
550 " lvx %v1, 0, %r4 \n" | |
551 " lvx %v0, %r11, %r4 \n" | |
552 " lvx %v13, %r9, %r3 \n" | |
553 " vperm %v0, %v1, %v0, %v11 \n" | |
554 " stvx %v12, %r5, %r3 \n" | |
555 " add %r4, %r4, %r5 \n" | |
556 " vavgub %v12, %v13, %v0 \n" | |
557 " add %r3, %r3, %r9 \n" | |
558 " bdnz ._L46 \n" | |
559 " lvx %v0, %r11, %r4 \n" | |
560 " lvx %v1, 0, %r4 \n" | |
561 " lvx %v13, %r5, %r3 \n" | |
562 " vperm %v0, %v1, %v0, %v11 \n" | |
563 " stvx %v12, 0, %r3 \n" | |
564 " vavgub %v12, %v13, %v0 \n" | |
565 " stvx %v12, %r5, %r3 \n" | |
566 ); | |
567 } | |
568 | |
569 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, | |
570 int stride, int height) | |
571 { | |
572 asm (" \n" | |
573 " lvsl %v12, 0, %r4 \n" | |
574 " li %r9, 7 \n" | |
575 " vmrghb %v12, %v12, %v12 \n" | |
576 " lvsl %v1, %r5, %r4 \n" | |
577 " lvx %v13, 0, %r4 \n" | |
578 " vpkuhum %v9, %v12, %v12 \n" | |
579 " lvx %v0, %r9, %r4 \n" | |
580 " srawi %r6, %r6, 1 \n" | |
581 " vmrghb %v1, %v1, %v1 \n" | |
582 " addi %r6, %r6, -1 \n" | |
583 " vperm %v0, %v13, %v0, %v9 \n" | |
584 " lvx %v11, 0, %r3 \n" | |
585 " mtctr %r6 \n" | |
586 " vpkuhum %v10, %v1, %v1 \n" | |
587 " add %r4, %r4, %r5 \n" | |
588 " vavgub %v12, %v11, %v0 \n" | |
589 "._L51: \n" | |
590 " li %r9, 7 \n" | |
591 " lvx %v0, %r9, %r4 \n" | |
592 " lvx %v13, 0, %r4 \n" | |
593 " lvx %v11, %r5, %r3 \n" | |
594 " stvewx %v12, 0, %r3 \n" | |
595 " vperm %v0, %v13, %v0, %v10 \n" | |
596 " li %r9, 4 \n" | |
597 " stvewx %v12, %r9, %r3 \n" | |
598 " vavgub %v1, %v11, %v0 \n" | |
599 " add %r4, %r4, %r5 \n" | |
600 " li %r9, 7 \n" | |
601 " lvx %v0, %r9, %r4 \n" | |
602 " add %r3, %r3, %r5 \n" | |
603 " lvx %v13, 0, %r4 \n" | |
604 " lvx %v11, %r5, %r3 \n" | |
605 " stvewx %v1, 0, %r3 \n" | |
606 " vperm %v0, %v13, %v0, %v9 \n" | |
607 " li %r9, 4 \n" | |
608 " stvewx %v1, %r9, %r3 \n" | |
609 " vavgub %v12, %v11, %v0 \n" | |
610 " add %r4, %r4, %r5 \n" | |
611 " add %r3, %r3, %r5 \n" | |
612 " bdnz ._L51 \n" | |
613 " li %r9, 7 \n" | |
614 " lvx %v0, %r9, %r4 \n" | |
615 " lvx %v13, 0, %r4 \n" | |
616 " lvx %v11, %r5, %r3 \n" | |
617 " stvewx %v12, 0, %r3 \n" | |
618 " vperm %v0, %v13, %v0, %v10 \n" | |
619 " li %r9, 4 \n" | |
620 " stvewx %v12, %r9, %r3 \n" | |
621 " vavgub %v1, %v11, %v0 \n" | |
622 " add %r3, %r3, %r5 \n" | |
623 " stvewx %v1, 0, %r3 \n" | |
624 " stvewx %v1, %r9, %r3 \n" | |
625 ); | |
626 } | |
627 | |
628 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, | |
629 int stride, int height) | |
630 { | |
631 asm (" \n" | |
632 " lvsl %v8, 0, %r4 \n" | |
633 " vspltisb %v0, 1 \n" | |
634 " li %r9, 16 \n" | |
635 " lvx %v12, %r9, %r4 \n" | |
636 " vaddubm %v7, %v8, %v0 \n" | |
637 " lvx %v11, 0, %r4 \n" | |
638 " srawi %r6, %r6, 1 \n" | |
639 " vperm %v1, %v11, %v12, %v7 \n" | |
640 " addi %r6, %r6, -1 \n" | |
641 " vperm %v0, %v11, %v12, %v8 \n" | |
642 " lvx %v9, 0, %r3 \n" | |
643 " mtctr %r6 \n" | |
644 " add %r9, %r5, %r5 \n" | |
645 " vavgub %v0, %v0, %v1 \n" | |
646 " add %r4, %r4, %r5 \n" | |
647 " vavgub %v10, %v9, %v0 \n" | |
648 "._L56: \n" | |
649 " li %r11, 16 \n" | |
650 " lvx %v11, 0, %r4 \n" | |
651 " lvx %v12, %r11, %r4 \n" | |
652 " lvx %v9, %r5, %r3 \n" | |
653 " stvx %v10, 0, %r3 \n" | |
654 " vperm %v0, %v11, %v12, %v7 \n" | |
655 " add %r4, %r4, %r5 \n" | |
656 " vperm %v1, %v11, %v12, %v8 \n" | |
657 " lvx %v11, 0, %r4 \n" | |
658 " lvx %v12, %r11, %r4 \n" | |
659 " vavgub %v1, %v1, %v0 \n" | |
660 " add %r4, %r4, %r5 \n" | |
661 " vperm %v13, %v11, %v12, %v7 \n" | |
662 " vavgub %v10, %v9, %v1 \n" | |
663 " vperm %v0, %v11, %v12, %v8 \n" | |
664 " lvx %v9, %r9, %r3 \n" | |
665 " stvx %v10, %r5, %r3 \n" | |
666 " vavgub %v0, %v0, %v13 \n" | |
667 " add %r3, %r3, %r9 \n" | |
668 " vavgub %v10, %v9, %v0 \n" | |
669 " bdnz ._L56 \n" | |
670 " lvx %v12, %r11, %r4 \n" | |
671 " lvx %v11, 0, %r4 \n" | |
672 " lvx %v9, %r5, %r3 \n" | |
673 " vperm %v1, %v11, %v12, %v7 \n" | |
674 " stvx %v10, 0, %r3 \n" | |
675 " vperm %v0, %v11, %v12, %v8 \n" | |
676 " vavgub %v0, %v0, %v1 \n" | |
677 " vavgub %v10, %v9, %v0 \n" | |
678 " stvx %v10, %r5, %r3 \n" | |
679 ); | |
680 } | |
681 | |
682 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, | |
683 int stride, int height) | |
684 { | |
685 asm (" \n" | |
686 " lvsl %v10, 0, %r4 \n" | |
687 " vspltisb %v13, 1 \n" | |
688 " li %r9, 8 \n" | |
689 " vmrghb %v10, %v10, %v10 \n" | |
690 " lvx %v11, 0, %r4 \n" | |
691 " lvx %v12, %r9, %r4 \n" | |
692 " vpkuhum %v7, %v10, %v10 \n" | |
693 " srawi %r6, %r6, 1 \n" | |
694 " lvsl %v10, %r5, %r4 \n" | |
695 " vaddubm %v6, %v7, %v13 \n" | |
696 " vperm %v0, %v11, %v12, %v7 \n" | |
697 " addi %r6, %r6, -1 \n" | |
698 " vmrghb %v10, %v10, %v10 \n" | |
699 " lvx %v9, 0, %r3 \n" | |
700 " mtctr %r6 \n" | |
701 " vperm %v1, %v11, %v12, %v6 \n" | |
702 " add %r4, %r4, %r5 \n" | |
703 " vpkuhum %v8, %v10, %v10 \n" | |
704 " vavgub %v0, %v0, %v1 \n" | |
705 " vaddubm %v13, %v8, %v13 \n" | |
706 " vavgub %v10, %v9, %v0 \n" | |
707 "._L61: \n" | |
708 " li %r9, 8 \n" | |
709 " lvx %v12, %r9, %r4 \n" | |
710 " lvx %v11, 0, %r4 \n" | |
711 " lvx %v9, %r5, %r3 \n" | |
712 " stvewx %v10, 0, %r3 \n" | |
713 " vperm %v1, %v11, %v12, %v13 \n" | |
714 " vperm %v0, %v11, %v12, %v8 \n" | |
715 " li %r9, 4 \n" | |
716 " stvewx %v10, %r9, %r3 \n" | |
717 " add %r4, %r4, %r5 \n" | |
718 " vavgub %v0, %v0, %v1 \n" | |
719 " li %r9, 8 \n" | |
720 " lvx %v12, %r9, %r4 \n" | |
721 " vavgub %v10, %v9, %v0 \n" | |
722 " lvx %v11, 0, %r4 \n" | |
723 " add %r3, %r3, %r5 \n" | |
724 " vperm %v1, %v11, %v12, %v6 \n" | |
725 " lvx %v9, %r5, %r3 \n" | |
726 " vperm %v0, %v11, %v12, %v7 \n" | |
727 " stvewx %v10, 0, %r3 \n" | |
728 " li %r9, 4 \n" | |
729 " vavgub %v0, %v0, %v1 \n" | |
730 " stvewx %v10, %r9, %r3 \n" | |
731 " add %r4, %r4, %r5 \n" | |
732 " add %r3, %r3, %r5 \n" | |
733 " vavgub %v10, %v9, %v0 \n" | |
734 " bdnz ._L61 \n" | |
735 " li %r9, 8 \n" | |
736 " lvx %v12, %r9, %r4 \n" | |
737 " lvx %v11, 0, %r4 \n" | |
738 " lvx %v9, %r5, %r3 \n" | |
739 " vperm %v1, %v11, %v12, %v13 \n" | |
740 " stvewx %v10, 0, %r3 \n" | |
741 " vperm %v0, %v11, %v12, %v8 \n" | |
742 " li %r9, 4 \n" | |
743 " stvewx %v10, %r9, %r3 \n" | |
744 " vavgub %v0, %v0, %v1 \n" | |
745 " add %r3, %r3, %r5 \n" | |
746 " vavgub %v10, %v9, %v0 \n" | |
747 " stvewx %v10, 0, %r3 \n" | |
748 " stvewx %v10, %r9, %r3 \n" | |
749 ); | |
750 } | |
751 | |
752 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, | |
753 int stride, int height) | |
754 { | |
755 asm (" \n" | |
756 " li %r9, 15 \n" | |
757 " lvx %v1, %r9, %r4 \n" | |
758 " lvsl %v9, 0, %r4 \n" | |
759 " lvx %v13, 0, %r4 \n" | |
760 " add %r4, %r4, %r5 \n" | |
761 " vperm %v11, %v13, %v1, %v9 \n" | |
762 " li %r11, 15 \n" | |
763 " lvx %v13, 0, %r4 \n" | |
764 " lvx %v1, %r11, %r4 \n" | |
765 " srawi %r6, %r6, 1 \n" | |
766 " vperm %v10, %v13, %v1, %v9 \n" | |
767 " addi %r6, %r6, -1 \n" | |
768 " lvx %v12, 0, %r3 \n" | |
769 " mtctr %r6 \n" | |
770 " vavgub %v0, %v11, %v10 \n" | |
771 " add %r9, %r5, %r5 \n" | |
772 " add %r4, %r4, %r5 \n" | |
773 " vavgub %v0, %v12, %v0 \n" | |
774 "._L66: \n" | |
775 " li %r11, 15 \n" | |
776 " lvx %v13, 0, %r4 \n" | |
777 " lvx %v1, %r11, %r4 \n" | |
778 " lvx %v12, %r5, %r3 \n" | |
779 " vperm %v11, %v13, %v1, %v9 \n" | |
780 " stvx %v0, 0, %r3 \n" | |
781 " add %r4, %r4, %r5 \n" | |
782 " vavgub %v0, %v11, %v10 \n" | |
783 " lvx %v13, 0, %r4 \n" | |
784 " lvx %v1, %r11, %r4 \n" | |
785 " vavgub %v0, %v12, %v0 \n" | |
786 " add %r4, %r4, %r5 \n" | |
787 " lvx %v12, %r9, %r3 \n" | |
788 " vperm %v10, %v13, %v1, %v9 \n" | |
789 " stvx %v0, %r5, %r3 \n" | |
790 " vavgub %v0, %v11, %v10 \n" | |
791 " add %r3, %r3, %r9 \n" | |
792 " vavgub %v0, %v12, %v0 \n" | |
793 " bdnz ._L66 \n" | |
794 " lvx %v1, %r11, %r4 \n" | |
795 " lvx %v13, 0, %r4 \n" | |
796 " lvx %v12, %r5, %r3 \n" | |
797 " vperm %v11, %v13, %v1, %v9 \n" | |
798 " stvx %v0, 0, %r3 \n" | |
799 " vavgub %v0, %v11, %v10 \n" | |
800 " vavgub %v0, %v12, %v0 \n" | |
801 " stvx %v0, %r5, %r3 \n" | |
802 ); | |
803 } | |
804 | |
805 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, | |
806 int stride, int height) | |
807 { | |
808 asm (" \n" | |
809 " lvsl %v12, 0, %r4 \n" | |
810 " lvsl %v9, %r5, %r4 \n" | |
811 " vmrghb %v12, %v12, %v12 \n" | |
812 " li %r9, 7 \n" | |
813 " lvx %v11, 0, %r4 \n" | |
814 " vmrghb %v9, %v9, %v9 \n" | |
815 " lvx %v13, %r9, %r4 \n" | |
816 " vpkuhum %v7, %v12, %v12 \n" | |
817 " add %r4, %r4, %r5 \n" | |
818 " vpkuhum %v8, %v9, %v9 \n" | |
819 " vperm %v12, %v11, %v13, %v7 \n" | |
820 " srawi %r6, %r6, 1 \n" | |
821 " lvx %v11, 0, %r4 \n" | |
822 " lvx %v13, %r9, %r4 \n" | |
823 " addi %r6, %r6, -1 \n" | |
824 " vperm %v9, %v11, %v13, %v8 \n" | |
825 " lvx %v10, 0, %r3 \n" | |
826 " mtctr %r6 \n" | |
827 " add %r4, %r4, %r5 \n" | |
828 " vavgub %v0, %v12, %v9 \n" | |
829 " vavgub %v1, %v10, %v0 \n" | |
830 "._L71: \n" | |
831 " li %r9, 7 \n" | |
832 " lvx %v13, %r9, %r4 \n" | |
833 " lvx %v11, 0, %r4 \n" | |
834 " lvx %v10, %r5, %r3 \n" | |
835 " stvewx %v1, 0, %r3 \n" | |
836 " vperm %v12, %v11, %v13, %v7 \n" | |
837 " li %r9, 4 \n" | |
838 " stvewx %v1, %r9, %r3 \n" | |
839 " vavgub %v0, %v12, %v9 \n" | |
840 " add %r4, %r4, %r5 \n" | |
841 " li %r9, 7 \n" | |
842 " vavgub %v1, %v10, %v0 \n" | |
843 " lvx %v13, %r9, %r4 \n" | |
844 " lvx %v11, 0, %r4 \n" | |
845 " add %r3, %r3, %r5 \n" | |
846 " vperm %v9, %v11, %v13, %v8 \n" | |
847 " lvx %v10, %r5, %r3 \n" | |
848 " stvewx %v1, 0, %r3 \n" | |
849 " vavgub %v0, %v12, %v9 \n" | |
850 " li %r9, 4 \n" | |
851 " stvewx %v1, %r9, %r3 \n" | |
852 " add %r4, %r4, %r5 \n" | |
853 " vavgub %v1, %v10, %v0 \n" | |
854 " add %r3, %r3, %r5 \n" | |
855 " bdnz ._L71 \n" | |
856 " li %r9, 7 \n" | |
857 " lvx %v13, %r9, %r4 \n" | |
858 " lvx %v11, 0, %r4 \n" | |
859 " lvx %v10, %r5, %r3 \n" | |
860 " vperm %v12, %v11, %v13, %v7 \n" | |
861 " stvewx %v1, 0, %r3 \n" | |
862 " li %r9, 4 \n" | |
863 " vavgub %v0, %v12, %v9 \n" | |
864 " stvewx %v1, %r9, %r3 \n" | |
865 " add %r3, %r3, %r5 \n" | |
866 " vavgub %v1, %v10, %v0 \n" | |
867 " stvewx %v1, 0, %r3 \n" | |
868 " stvewx %v1, %r9, %r3 \n" | |
869 ); | |
870 } | |
871 | |
872 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, | |
873 int stride, int height) | |
874 { | |
875 asm (" \n" | |
876 " lvsl %v4, 0, %r4 \n" | |
877 " vspltisb %v2, 1 \n" | |
878 " li %r9, 16 \n" | |
879 " lvx %v1, %r9, %r4 \n" | |
880 " vaddubm %v3, %v4, %v2 \n" | |
881 " lvx %v13, 0, %r4 \n" | |
882 " add %r4, %r4, %r5 \n" | |
883 " vperm %v10, %v13, %v1, %v3 \n" | |
884 " li %r11, 16 \n" | |
885 " vperm %v11, %v13, %v1, %v4 \n" | |
886 " srawi %r6, %r6, 1 \n" | |
887 " lvx %v13, 0, %r4 \n" | |
888 " lvx %v1, %r11, %r4 \n" | |
889 " vavgub %v9, %v11, %v10 \n" | |
890 " vxor %v8, %v11, %v10 \n" | |
891 " addi %r6, %r6, -1 \n" | |
892 " vperm %v10, %v13, %v1, %v3 \n" | |
893 " lvx %v6, 0, %r3 \n" | |
894 " mtctr %r6 \n" | |
895 " vperm %v11, %v13, %v1, %v4 \n" | |
896 " add %r9, %r5, %r5 \n" | |
897 " add %r4, %r4, %r5 \n" | |
898 " vxor %v5, %v11, %v10 \n" | |
899 " vavgub %v7, %v11, %v10 \n" | |
900 " vor %v1, %v8, %v5 \n" | |
901 " vxor %v13, %v9, %v7 \n" | |
902 " vand %v1, %v2, %v1 \n" | |
903 " vavgub %v0, %v9, %v7 \n" | |
904 " vand %v1, %v1, %v13 \n" | |
905 " vsububm %v0, %v0, %v1 \n" | |
906 " vavgub %v12, %v6, %v0 \n" | |
907 "._L76: \n" | |
908 " li %r11, 16 \n" | |
909 " lvx %v13, 0, %r4 \n" | |
910 " lvx %v1, %r11, %r4 \n" | |
911 " lvx %v6, %r5, %r3 \n" | |
912 " stvx %v12, 0, %r3 \n" | |
913 " vperm %v10, %v13, %v1, %v3 \n" | |
914 " vperm %v11, %v13, %v1, %v4 \n" | |
915 " add %r4, %r4, %r5 \n" | |
916 " lvx %v13, 0, %r4 \n" | |
917 " lvx %v1, %r11, %r4 \n" | |
918 " vavgub %v9, %v11, %v10 \n" | |
919 " vxor %v8, %v11, %v10 \n" | |
920 " add %r4, %r4, %r5 \n" | |
921 " vperm %v10, %v13, %v1, %v3 \n" | |
922 " vavgub %v12, %v9, %v7 \n" | |
923 " vperm %v11, %v13, %v1, %v4 \n" | |
924 " vor %v0, %v8, %v5 \n" | |
925 " vxor %v13, %v9, %v7 \n" | |
926 " vxor %v5, %v11, %v10 \n" | |
927 " vand %v0, %v2, %v0 \n" | |
928 " vavgub %v7, %v11, %v10 \n" | |
929 " vor %v1, %v8, %v5 \n" | |
930 " vand %v0, %v0, %v13 \n" | |
931 " vand %v1, %v2, %v1 \n" | |
932 " vxor %v13, %v9, %v7 \n" | |
933 " vsububm %v12, %v12, %v0 \n" | |
934 " vand %v1, %v1, %v13 \n" | |
935 " vavgub %v0, %v9, %v7 \n" | |
936 " vavgub %v12, %v6, %v12 \n" | |
937 " lvx %v6, %r9, %r3 \n" | |
938 " vsububm %v0, %v0, %v1 \n" | |
939 " stvx %v12, %r5, %r3 \n" | |
940 " vavgub %v12, %v6, %v0 \n" | |
941 " add %r3, %r3, %r9 \n" | |
942 " bdnz ._L76 \n" | |
943 " lvx %v1, %r11, %r4 \n" | |
944 " lvx %v13, 0, %r4 \n" | |
945 " lvx %v6, %r5, %r3 \n" | |
946 " vperm %v10, %v13, %v1, %v3 \n" | |
947 " stvx %v12, 0, %r3 \n" | |
948 " vperm %v11, %v13, %v1, %v4 \n" | |
949 " vxor %v8, %v11, %v10 \n" | |
950 " vavgub %v9, %v11, %v10 \n" | |
951 " vor %v0, %v8, %v5 \n" | |
952 " vxor %v13, %v9, %v7 \n" | |
953 " vand %v0, %v2, %v0 \n" | |
954 " vavgub %v1, %v9, %v7 \n" | |
955 " vand %v0, %v0, %v13 \n" | |
956 " vsububm %v1, %v1, %v0 \n" | |
957 " vavgub %v12, %v6, %v1 \n" | |
958 " stvx %v12, %r5, %r3 \n" | |
959 ); | |
960 } | |
961 | |
962 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, | |
963 int stride, int height) | |
964 { | |
965 asm (" \n" | |
966 " lvsl %v2, 0, %r4 \n" | |
967 " vspltisb %v19, 1 \n" | |
968 " lvsl %v3, %r5, %r4 \n" | |
969 " vmrghb %v2, %v2, %v2 \n" | |
970 " li %r9, 8 \n" | |
971 " vmrghb %v3, %v3, %v3 \n" | |
972 " lvx %v9, 0, %r4 \n" | |
973 " vpkuhum %v2, %v2, %v2 \n" | |
974 " lvx %v1, %r9, %r4 \n" | |
975 " vpkuhum %v3, %v3, %v3 \n" | |
976 " add %r4, %r4, %r5 \n" | |
977 " vaddubm %v18, %v2, %v19 \n" | |
978 " vperm %v11, %v9, %v1, %v2 \n" | |
979 " srawi %r6, %r6, 1 \n" | |
980 " vaddubm %v17, %v3, %v19 \n" | |
981 " addi %r6, %r6, -1 \n" | |
982 " vperm %v10, %v9, %v1, %v18 \n" | |
983 " lvx %v4, 0, %r3 \n" | |
984 " mtctr %r6 \n" | |
985 " lvx %v1, %r9, %r4 \n" | |
986 " lvx %v9, 0, %r4 \n" | |
987 " vavgub %v8, %v11, %v10 \n" | |
988 " vxor %v7, %v11, %v10 \n" | |
989 " add %r4, %r4, %r5 \n" | |
990 " vperm %v10, %v9, %v1, %v17 \n" | |
991 " vperm %v11, %v9, %v1, %v3 \n" | |
992 " vxor %v5, %v11, %v10 \n" | |
993 " vavgub %v6, %v11, %v10 \n" | |
994 " vor %v1, %v7, %v5 \n" | |
995 " vxor %v13, %v8, %v6 \n" | |
996 " vand %v1, %v19, %v1 \n" | |
997 " vavgub %v0, %v8, %v6 \n" | |
998 " vand %v1, %v1, %v13 \n" | |
999 " vsububm %v0, %v0, %v1 \n" | |
1000 " vavgub %v13, %v4, %v0 \n" | |
1001 "._L81: \n" | |
1002 " li %r9, 8 \n" | |
1003 " lvx %v1, %r9, %r4 \n" | |
1004 " lvx %v9, 0, %r4 \n" | |
1005 " lvx %v4, %r5, %r3 \n" | |
1006 " stvewx %v13, 0, %r3 \n" | |
1007 " vperm %v10, %v9, %v1, %v18 \n" | |
1008 " vperm %v11, %v9, %v1, %v2 \n" | |
1009 " li %r9, 4 \n" | |
1010 " stvewx %v13, %r9, %r3 \n" | |
1011 " vxor %v7, %v11, %v10 \n" | |
1012 " add %r4, %r4, %r5 \n" | |
1013 " li %r9, 8 \n" | |
1014 " vavgub %v8, %v11, %v10 \n" | |
1015 " lvx %v1, %r9, %r4 \n" | |
1016 " vor %v0, %v7, %v5 \n" | |
1017 " lvx %v9, 0, %r4 \n" | |
1018 " vxor %v12, %v8, %v6 \n" | |
1019 " vand %v0, %v19, %v0 \n" | |
1020 " add %r3, %r3, %r5 \n" | |
1021 " vperm %v10, %v9, %v1, %v17 \n" | |
1022 " vavgub %v13, %v8, %v6 \n" | |
1023 " li %r9, 4 \n" | |
1024 " vperm %v11, %v9, %v1, %v3 \n" | |
1025 " vand %v0, %v0, %v12 \n" | |
1026 " add %r4, %r4, %r5 \n" | |
1027 " vxor %v5, %v11, %v10 \n" | |
1028 " vavgub %v6, %v11, %v10 \n" | |
1029 " vor %v1, %v7, %v5 \n" | |
1030 " vsububm %v13, %v13, %v0 \n" | |
1031 " vxor %v0, %v8, %v6 \n" | |
1032 " vand %v1, %v19, %v1 \n" | |
1033 " vavgub %v13, %v4, %v13 \n" | |
1034 " vand %v1, %v1, %v0 \n" | |
1035 " lvx %v4, %r5, %r3 \n" | |
1036 " vavgub %v0, %v8, %v6 \n" | |
1037 " stvewx %v13, 0, %r3 \n" | |
1038 " stvewx %v13, %r9, %r3 \n" | |
1039 " vsububm %v0, %v0, %v1 \n" | |
1040 " add %r3, %r3, %r5 \n" | |
1041 " vavgub %v13, %v4, %v0 \n" | |
1042 " bdnz ._L81 \n" | |
1043 " li %r9, 8 \n" | |
1044 " lvx %v1, %r9, %r4 \n" | |
1045 " lvx %v9, 0, %r4 \n" | |
1046 " lvx %v4, %r5, %r3 \n" | |
1047 " vperm %v10, %v9, %v1, %v18 \n" | |
1048 " stvewx %v13, 0, %r3 \n" | |
1049 " vperm %v11, %v9, %v1, %v2 \n" | |
1050 " li %r9, 4 \n" | |
1051 " stvewx %v13, %r9, %r3 \n" | |
1052 " vxor %v7, %v11, %v10 \n" | |
1053 " add %r3, %r3, %r5 \n" | |
1054 " vavgub %v8, %v11, %v10 \n" | |
1055 " vor %v0, %v7, %v5 \n" | |
1056 " vxor %v13, %v8, %v6 \n" | |
1057 " vand %v0, %v19, %v0 \n" | |
1058 " vavgub %v1, %v8, %v6 \n" | |
1059 " vand %v0, %v0, %v13 \n" | |
1060 " vsububm %v1, %v1, %v0 \n" | |
1061 " vavgub %v13, %v4, %v1 \n" | |
1062 " stvewx %v13, 0, %r3 \n" | |
1063 " stvewx %v13, %r9, %r3 \n" | |
1064 ); | |
1065 } | |
1066 | |
1067 MPEG2_MC_EXTERN (altivec) | |
1068 | |
1069 #endif /* ARCH_PPC */ | |
1070 | |
1071 #else /* __ALTIVEC__ */ | |
1072 | |
1073 #define vector_s16_t vector signed short | |
1074 #define vector_u16_t vector unsigned short | |
1075 #define vector_s8_t vector signed char | |
1076 #define vector_u8_t vector unsigned char | |
1077 #define vector_s32_t vector signed int | |
1078 #define vector_u32_t vector unsigned int | |
1079 | |
1080 void MC_put_o_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1081 const int stride, int height) | |
1082 { | |
1083 vector_u8_t perm, ref0, ref1, tmp; | |
1084 | |
1085 perm = vec_lvsl (0, ref); | |
1086 | |
1087 height = (height >> 1) - 1; | |
1088 | |
1089 ref0 = vec_ld (0, ref); | |
1090 ref1 = vec_ld (15, ref); | |
1091 ref += stride; | |
1092 tmp = vec_perm (ref0, ref1, perm); | |
1093 | |
1094 do { | |
1095 ref0 = vec_ld (0, ref); | |
1096 ref1 = vec_ld (15, ref); | |
1097 ref += stride; | |
1098 vec_st (tmp, 0, dest); | |
1099 tmp = vec_perm (ref0, ref1, perm); | |
1100 | |
1101 ref0 = vec_ld (0, ref); | |
1102 ref1 = vec_ld (15, ref); | |
1103 ref += stride; | |
1104 vec_st (tmp, stride, dest); | |
1105 dest += 2*stride; | |
1106 tmp = vec_perm (ref0, ref1, perm); | |
1107 } while (--height); | |
1108 | |
1109 ref0 = vec_ld (0, ref); | |
1110 ref1 = vec_ld (15, ref); | |
1111 vec_st (tmp, 0, dest); | |
1112 tmp = vec_perm (ref0, ref1, perm); | |
1113 vec_st (tmp, stride, dest); | |
1114 } | |
1115 | |
1116 void MC_put_o_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1117 const int stride, int height) | |
1118 { | |
1119 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; | |
1120 | |
1121 tmp0 = vec_lvsl (0, ref); | |
1122 tmp0 = vec_mergeh (tmp0, tmp0); | |
1123 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
1124 tmp1 = vec_lvsl (stride, ref); | |
1125 tmp1 = vec_mergeh (tmp1, tmp1); | |
1126 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
1127 | |
1128 height = (height >> 1) - 1; | |
1129 | |
1130 ref0 = vec_ld (0, ref); | |
1131 ref1 = vec_ld (7, ref); | |
1132 ref += stride; | |
1133 tmp0 = vec_perm (ref0, ref1, perm0); | |
1134 | |
1135 do { | |
1136 ref0 = vec_ld (0, ref); | |
1137 ref1 = vec_ld (7, ref); | |
1138 ref += stride; | |
1139 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1140 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1141 dest += stride; | |
1142 tmp1 = vec_perm (ref0, ref1, perm1); | |
1143 | |
1144 ref0 = vec_ld (0, ref); | |
1145 ref1 = vec_ld (7, ref); | |
1146 ref += stride; | |
1147 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1148 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1149 dest += stride; | |
1150 tmp0 = vec_perm (ref0, ref1, perm0); | |
1151 } while (--height); | |
1152 | |
1153 ref0 = vec_ld (0, ref); | |
1154 ref1 = vec_ld (7, ref); | |
1155 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1156 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1157 dest += stride; | |
1158 tmp1 = vec_perm (ref0, ref1, perm1); | |
1159 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1160 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1161 } | |
1162 | |
1163 void MC_put_x_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1164 const int stride, int height) | |
1165 { | |
1166 vector_u8_t permA, permB, ref0, ref1, tmp; | |
1167 | |
1168 permA = vec_lvsl (0, ref); | |
1169 permB = vec_add (permA, vec_splat_u8 (1)); | |
1170 | |
1171 height = (height >> 1) - 1; | |
1172 | |
1173 ref0 = vec_ld (0, ref); | |
1174 ref1 = vec_ld (16, ref); | |
1175 ref += stride; | |
1176 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
1177 vec_perm (ref0, ref1, permB)); | |
1178 | |
1179 do { | |
1180 ref0 = vec_ld (0, ref); | |
1181 ref1 = vec_ld (16, ref); | |
1182 ref += stride; | |
1183 vec_st (tmp, 0, dest); | |
1184 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
1185 vec_perm (ref0, ref1, permB)); | |
1186 | |
1187 ref0 = vec_ld (0, ref); | |
1188 ref1 = vec_ld (16, ref); | |
1189 ref += stride; | |
1190 vec_st (tmp, stride, dest); | |
1191 dest += 2*stride; | |
1192 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
1193 vec_perm (ref0, ref1, permB)); | |
1194 } while (--height); | |
1195 | |
1196 ref0 = vec_ld (0, ref); | |
1197 ref1 = vec_ld (16, ref); | |
1198 vec_st (tmp, 0, dest); | |
1199 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
1200 vec_perm (ref0, ref1, permB)); | |
1201 vec_st (tmp, stride, dest); | |
1202 } | |
1203 | |
1204 void MC_put_x_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1205 const int stride, int height) | |
1206 { | |
1207 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
1208 | |
1209 ones = vec_splat_u8 (1); | |
1210 tmp0 = vec_lvsl (0, ref); | |
1211 tmp0 = vec_mergeh (tmp0, tmp0); | |
1212 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
1213 perm0B = vec_add (perm0A, ones); | |
1214 tmp1 = vec_lvsl (stride, ref); | |
1215 tmp1 = vec_mergeh (tmp1, tmp1); | |
1216 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
1217 perm1B = vec_add (perm1A, ones); | |
1218 | |
1219 height = (height >> 1) - 1; | |
1220 | |
1221 ref0 = vec_ld (0, ref); | |
1222 ref1 = vec_ld (8, ref); | |
1223 ref += stride; | |
1224 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
1225 vec_perm (ref0, ref1, perm0B)); | |
1226 | |
1227 do { | |
1228 ref0 = vec_ld (0, ref); | |
1229 ref1 = vec_ld (8, ref); | |
1230 ref += stride; | |
1231 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1232 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1233 dest += stride; | |
1234 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
1235 vec_perm (ref0, ref1, perm1B)); | |
1236 | |
1237 ref0 = vec_ld (0, ref); | |
1238 ref1 = vec_ld (8, ref); | |
1239 ref += stride; | |
1240 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1241 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1242 dest += stride; | |
1243 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), | |
1244 vec_perm (ref0, ref1, perm0B)); | |
1245 } while (--height); | |
1246 | |
1247 ref0 = vec_ld (0, ref); | |
1248 ref1 = vec_ld (8, ref); | |
1249 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1250 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1251 dest += stride; | |
1252 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), | |
1253 vec_perm (ref0, ref1, perm1B)); | |
1254 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1255 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1256 } | |
1257 | |
1258 void MC_put_y_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1259 const int stride, int height) | |
1260 { | |
1261 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; | |
1262 | |
1263 perm = vec_lvsl (0, ref); | |
1264 | |
1265 height = (height >> 1) - 1; | |
1266 | |
1267 ref0 = vec_ld (0, ref); | |
1268 ref1 = vec_ld (15, ref); | |
1269 ref += stride; | |
1270 tmp0 = vec_perm (ref0, ref1, perm); | |
1271 ref0 = vec_ld (0, ref); | |
1272 ref1 = vec_ld (15, ref); | |
1273 ref += stride; | |
1274 tmp1 = vec_perm (ref0, ref1, perm); | |
1275 tmp = vec_avg (tmp0, tmp1); | |
1276 | |
1277 do { | |
1278 ref0 = vec_ld (0, ref); | |
1279 ref1 = vec_ld (15, ref); | |
1280 ref += stride; | |
1281 vec_st (tmp, 0, dest); | |
1282 tmp0 = vec_perm (ref0, ref1, perm); | |
1283 tmp = vec_avg (tmp0, tmp1); | |
1284 | |
1285 ref0 = vec_ld (0, ref); | |
1286 ref1 = vec_ld (15, ref); | |
1287 ref += stride; | |
1288 vec_st (tmp, stride, dest); | |
1289 dest += 2*stride; | |
1290 tmp1 = vec_perm (ref0, ref1, perm); | |
1291 tmp = vec_avg (tmp0, tmp1); | |
1292 } while (--height); | |
1293 | |
1294 ref0 = vec_ld (0, ref); | |
1295 ref1 = vec_ld (15, ref); | |
1296 vec_st (tmp, 0, dest); | |
1297 tmp0 = vec_perm (ref0, ref1, perm); | |
1298 tmp = vec_avg (tmp0, tmp1); | |
1299 vec_st (tmp, stride, dest); | |
1300 } | |
1301 | |
1302 void MC_put_y_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1303 const int stride, int height) | |
1304 { | |
1305 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; | |
1306 | |
1307 tmp0 = vec_lvsl (0, ref); | |
1308 tmp0 = vec_mergeh (tmp0, tmp0); | |
1309 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
1310 tmp1 = vec_lvsl (stride, ref); | |
1311 tmp1 = vec_mergeh (tmp1, tmp1); | |
1312 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
1313 | |
1314 height = (height >> 1) - 1; | |
1315 | |
1316 ref0 = vec_ld (0, ref); | |
1317 ref1 = vec_ld (7, ref); | |
1318 ref += stride; | |
1319 tmp0 = vec_perm (ref0, ref1, perm0); | |
1320 ref0 = vec_ld (0, ref); | |
1321 ref1 = vec_ld (7, ref); | |
1322 ref += stride; | |
1323 tmp1 = vec_perm (ref0, ref1, perm1); | |
1324 tmp = vec_avg (tmp0, tmp1); | |
1325 | |
1326 do { | |
1327 ref0 = vec_ld (0, ref); | |
1328 ref1 = vec_ld (7, ref); | |
1329 ref += stride; | |
1330 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1331 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1332 dest += stride; | |
1333 tmp0 = vec_perm (ref0, ref1, perm0); | |
1334 tmp = vec_avg (tmp0, tmp1); | |
1335 | |
1336 ref0 = vec_ld (0, ref); | |
1337 ref1 = vec_ld (7, ref); | |
1338 ref += stride; | |
1339 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1340 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1341 dest += stride; | |
1342 tmp1 = vec_perm (ref0, ref1, perm1); | |
1343 tmp = vec_avg (tmp0, tmp1); | |
1344 } while (--height); | |
1345 | |
1346 ref0 = vec_ld (0, ref); | |
1347 ref1 = vec_ld (7, ref); | |
1348 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1349 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1350 dest += stride; | |
1351 tmp0 = vec_perm (ref0, ref1, perm0); | |
1352 tmp = vec_avg (tmp0, tmp1); | |
1353 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1354 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1355 } | |
1356 | |
1357 void MC_put_xy_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1358 const int stride, int height) | |
1359 { | |
1360 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
1361 vector_u8_t ones; | |
1362 | |
1363 ones = vec_splat_u8 (1); | |
1364 permA = vec_lvsl (0, ref); | |
1365 permB = vec_add (permA, ones); | |
1366 | |
1367 height = (height >> 1) - 1; | |
1368 | |
1369 ref0 = vec_ld (0, ref); | |
1370 ref1 = vec_ld (16, ref); | |
1371 ref += stride; | |
1372 A = vec_perm (ref0, ref1, permA); | |
1373 B = vec_perm (ref0, ref1, permB); | |
1374 avg0 = vec_avg (A, B); | |
1375 xor0 = vec_xor (A, B); | |
1376 | |
1377 ref0 = vec_ld (0, ref); | |
1378 ref1 = vec_ld (16, ref); | |
1379 ref += stride; | |
1380 A = vec_perm (ref0, ref1, permA); | |
1381 B = vec_perm (ref0, ref1, permB); | |
1382 avg1 = vec_avg (A, B); | |
1383 xor1 = vec_xor (A, B); | |
1384 tmp = vec_sub (vec_avg (avg0, avg1), | |
1385 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1386 vec_xor (avg0, avg1))); | |
1387 | |
1388 do { | |
1389 ref0 = vec_ld (0, ref); | |
1390 ref1 = vec_ld (16, ref); | |
1391 ref += stride; | |
1392 vec_st (tmp, 0, dest); | |
1393 A = vec_perm (ref0, ref1, permA); | |
1394 B = vec_perm (ref0, ref1, permB); | |
1395 avg0 = vec_avg (A, B); | |
1396 xor0 = vec_xor (A, B); | |
1397 tmp = vec_sub (vec_avg (avg0, avg1), | |
1398 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1399 vec_xor (avg0, avg1))); | |
1400 | |
1401 ref0 = vec_ld (0, ref); | |
1402 ref1 = vec_ld (16, ref); | |
1403 ref += stride; | |
1404 vec_st (tmp, stride, dest); | |
1405 dest += 2*stride; | |
1406 A = vec_perm (ref0, ref1, permA); | |
1407 B = vec_perm (ref0, ref1, permB); | |
1408 avg1 = vec_avg (A, B); | |
1409 xor1 = vec_xor (A, B); | |
1410 tmp = vec_sub (vec_avg (avg0, avg1), | |
1411 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1412 vec_xor (avg0, avg1))); | |
1413 } while (--height); | |
1414 | |
1415 ref0 = vec_ld (0, ref); | |
1416 ref1 = vec_ld (16, ref); | |
1417 vec_st (tmp, 0, dest); | |
1418 A = vec_perm (ref0, ref1, permA); | |
1419 B = vec_perm (ref0, ref1, permB); | |
1420 avg0 = vec_avg (A, B); | |
1421 xor0 = vec_xor (A, B); | |
1422 tmp = vec_sub (vec_avg (avg0, avg1), | |
1423 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1424 vec_xor (avg0, avg1))); | |
1425 vec_st (tmp, stride, dest); | |
1426 } | |
1427 | |
1428 void MC_put_xy_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1429 const int stride, int height) | |
1430 { | |
1431 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
1432 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; | |
1433 | |
1434 ones = vec_splat_u8 (1); | |
1435 perm0A = vec_lvsl (0, ref); | |
1436 perm0A = vec_mergeh (perm0A, perm0A); | |
1437 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
1438 perm0B = vec_add (perm0A, ones); | |
1439 perm1A = vec_lvsl (stride, ref); | |
1440 perm1A = vec_mergeh (perm1A, perm1A); | |
1441 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
1442 perm1B = vec_add (perm1A, ones); | |
1443 | |
1444 height = (height >> 1) - 1; | |
1445 | |
1446 ref0 = vec_ld (0, ref); | |
1447 ref1 = vec_ld (8, ref); | |
1448 ref += stride; | |
1449 A = vec_perm (ref0, ref1, perm0A); | |
1450 B = vec_perm (ref0, ref1, perm0B); | |
1451 avg0 = vec_avg (A, B); | |
1452 xor0 = vec_xor (A, B); | |
1453 | |
1454 ref0 = vec_ld (0, ref); | |
1455 ref1 = vec_ld (8, ref); | |
1456 ref += stride; | |
1457 A = vec_perm (ref0, ref1, perm1A); | |
1458 B = vec_perm (ref0, ref1, perm1B); | |
1459 avg1 = vec_avg (A, B); | |
1460 xor1 = vec_xor (A, B); | |
1461 tmp = vec_sub (vec_avg (avg0, avg1), | |
1462 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1463 vec_xor (avg0, avg1))); | |
1464 | |
1465 do { | |
1466 ref0 = vec_ld (0, ref); | |
1467 ref1 = vec_ld (8, ref); | |
1468 ref += stride; | |
1469 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1470 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1471 dest += stride; | |
1472 A = vec_perm (ref0, ref1, perm0A); | |
1473 B = vec_perm (ref0, ref1, perm0B); | |
1474 avg0 = vec_avg (A, B); | |
1475 xor0 = vec_xor (A, B); | |
1476 tmp = vec_sub (vec_avg (avg0, avg1), | |
1477 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1478 vec_xor (avg0, avg1))); | |
1479 | |
1480 ref0 = vec_ld (0, ref); | |
1481 ref1 = vec_ld (8, ref); | |
1482 ref += stride; | |
1483 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1484 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1485 dest += stride; | |
1486 A = vec_perm (ref0, ref1, perm1A); | |
1487 B = vec_perm (ref0, ref1, perm1B); | |
1488 avg1 = vec_avg (A, B); | |
1489 xor1 = vec_xor (A, B); | |
1490 tmp = vec_sub (vec_avg (avg0, avg1), | |
1491 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1492 vec_xor (avg0, avg1))); | |
1493 } while (--height); | |
1494 | |
1495 ref0 = vec_ld (0, ref); | |
1496 ref1 = vec_ld (8, ref); | |
1497 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1498 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1499 dest += stride; | |
1500 A = vec_perm (ref0, ref1, perm0A); | |
1501 B = vec_perm (ref0, ref1, perm0B); | |
1502 avg0 = vec_avg (A, B); | |
1503 xor0 = vec_xor (A, B); | |
1504 tmp = vec_sub (vec_avg (avg0, avg1), | |
1505 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1506 vec_xor (avg0, avg1))); | |
1507 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1508 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1509 } | |
1510 | |
1511 #if 0 | |
1512 void MC_put_xy_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1513 const int stride, int height) | |
1514 { | |
1515 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; | |
1516 vector_u16_t splat2, temp; | |
1517 | |
1518 ones = vec_splat_u8 (1); | |
1519 permA = vec_lvsl (0, ref); | |
1520 permB = vec_add (permA, ones); | |
1521 | |
1522 zero = vec_splat_u8 (0); | |
1523 splat2 = vec_splat_u16 (2); | |
1524 | |
1525 do { | |
1526 ref0 = vec_ld (0, ref); | |
1527 ref1 = vec_ld (8, ref); | |
1528 ref += stride; | |
1529 A = vec_perm (ref0, ref1, permA); | |
1530 B = vec_perm (ref0, ref1, permB); | |
1531 ref0 = vec_ld (0, ref); | |
1532 ref1 = vec_ld (8, ref); | |
1533 C = vec_perm (ref0, ref1, permA); | |
1534 D = vec_perm (ref0, ref1, permB); | |
1535 | |
1536 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), | |
1537 (vector_u16_t)vec_mergeh (zero, B)), | |
1538 vec_add ((vector_u16_t)vec_mergeh (zero, C), | |
1539 (vector_u16_t)vec_mergeh (zero, D))); | |
1540 temp = vec_sr (vec_add (temp, splat2), splat2); | |
1541 tmp = vec_pack (temp, temp); | |
1542 | |
1543 vec_st (tmp, 0, dest); | |
1544 dest += stride; | |
1545 tmp = vec_avg (vec_perm (ref0, ref1, permA), | |
1546 vec_perm (ref0, ref1, permB)); | |
1547 } while (--height); | |
1548 } | |
1549 #endif | |
1550 | |
1551 void MC_avg_o_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1552 const int stride, int height) | |
1553 { | |
1554 vector_u8_t perm, ref0, ref1, tmp, prev; | |
1555 | |
1556 perm = vec_lvsl (0, ref); | |
1557 | |
1558 height = (height >> 1) - 1; | |
1559 | |
1560 ref0 = vec_ld (0, ref); | |
1561 ref1 = vec_ld (15, ref); | |
1562 ref += stride; | |
1563 prev = vec_ld (0, dest); | |
1564 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
1565 | |
1566 do { | |
1567 ref0 = vec_ld (0, ref); | |
1568 ref1 = vec_ld (15, ref); | |
1569 ref += stride; | |
1570 prev = vec_ld (stride, dest); | |
1571 vec_st (tmp, 0, dest); | |
1572 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
1573 | |
1574 ref0 = vec_ld (0, ref); | |
1575 ref1 = vec_ld (15, ref); | |
1576 ref += stride; | |
1577 prev = vec_ld (2*stride, dest); | |
1578 vec_st (tmp, stride, dest); | |
1579 dest += 2*stride; | |
1580 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
1581 } while (--height); | |
1582 | |
1583 ref0 = vec_ld (0, ref); | |
1584 ref1 = vec_ld (15, ref); | |
1585 prev = vec_ld (stride, dest); | |
1586 vec_st (tmp, 0, dest); | |
1587 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); | |
1588 vec_st (tmp, stride, dest); | |
1589 } | |
1590 | |
1591 void MC_avg_o_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1592 const int stride, int height) | |
1593 { | |
1594 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; | |
1595 | |
1596 tmp0 = vec_lvsl (0, ref); | |
1597 tmp0 = vec_mergeh (tmp0, tmp0); | |
1598 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
1599 tmp1 = vec_lvsl (stride, ref); | |
1600 tmp1 = vec_mergeh (tmp1, tmp1); | |
1601 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
1602 | |
1603 height = (height >> 1) - 1; | |
1604 | |
1605 ref0 = vec_ld (0, ref); | |
1606 ref1 = vec_ld (7, ref); | |
1607 ref += stride; | |
1608 prev = vec_ld (0, dest); | |
1609 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
1610 | |
1611 do { | |
1612 ref0 = vec_ld (0, ref); | |
1613 ref1 = vec_ld (7, ref); | |
1614 ref += stride; | |
1615 prev = vec_ld (stride, dest); | |
1616 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1617 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1618 dest += stride; | |
1619 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
1620 | |
1621 ref0 = vec_ld (0, ref); | |
1622 ref1 = vec_ld (7, ref); | |
1623 ref += stride; | |
1624 prev = vec_ld (stride, dest); | |
1625 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1626 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1627 dest += stride; | |
1628 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); | |
1629 } while (--height); | |
1630 | |
1631 ref0 = vec_ld (0, ref); | |
1632 ref1 = vec_ld (7, ref); | |
1633 prev = vec_ld (stride, dest); | |
1634 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1635 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1636 dest += stride; | |
1637 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); | |
1638 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1639 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1640 } | |
1641 | |
1642 void MC_avg_x_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1643 const int stride, int height) | |
1644 { | |
1645 vector_u8_t permA, permB, ref0, ref1, tmp, prev; | |
1646 | |
1647 permA = vec_lvsl (0, ref); | |
1648 permB = vec_add (permA, vec_splat_u8 (1)); | |
1649 | |
1650 height = (height >> 1) - 1; | |
1651 | |
1652 ref0 = vec_ld (0, ref); | |
1653 ref1 = vec_ld (16, ref); | |
1654 prev = vec_ld (0, dest); | |
1655 ref += stride; | |
1656 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
1657 vec_perm (ref0, ref1, permB))); | |
1658 | |
1659 do { | |
1660 ref0 = vec_ld (0, ref); | |
1661 ref1 = vec_ld (16, ref); | |
1662 ref += stride; | |
1663 prev = vec_ld (stride, dest); | |
1664 vec_st (tmp, 0, dest); | |
1665 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
1666 vec_perm (ref0, ref1, permB))); | |
1667 | |
1668 ref0 = vec_ld (0, ref); | |
1669 ref1 = vec_ld (16, ref); | |
1670 ref += stride; | |
1671 prev = vec_ld (2*stride, dest); | |
1672 vec_st (tmp, stride, dest); | |
1673 dest += 2*stride; | |
1674 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
1675 vec_perm (ref0, ref1, permB))); | |
1676 } while (--height); | |
1677 | |
1678 ref0 = vec_ld (0, ref); | |
1679 ref1 = vec_ld (16, ref); | |
1680 prev = vec_ld (stride, dest); | |
1681 vec_st (tmp, 0, dest); | |
1682 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), | |
1683 vec_perm (ref0, ref1, permB))); | |
1684 vec_st (tmp, stride, dest); | |
1685 } | |
1686 | |
1687 void MC_avg_x_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1688 const int stride, int height) | |
1689 { | |
1690 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; | |
1691 vector_u8_t prev; | |
1692 | |
1693 ones = vec_splat_u8 (1); | |
1694 tmp0 = vec_lvsl (0, ref); | |
1695 tmp0 = vec_mergeh (tmp0, tmp0); | |
1696 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
1697 perm0B = vec_add (perm0A, ones); | |
1698 tmp1 = vec_lvsl (stride, ref); | |
1699 tmp1 = vec_mergeh (tmp1, tmp1); | |
1700 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
1701 perm1B = vec_add (perm1A, ones); | |
1702 | |
1703 height = (height >> 1) - 1; | |
1704 | |
1705 ref0 = vec_ld (0, ref); | |
1706 ref1 = vec_ld (8, ref); | |
1707 prev = vec_ld (0, dest); | |
1708 ref += stride; | |
1709 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
1710 vec_perm (ref0, ref1, perm0B))); | |
1711 | |
1712 do { | |
1713 ref0 = vec_ld (0, ref); | |
1714 ref1 = vec_ld (8, ref); | |
1715 ref += stride; | |
1716 prev = vec_ld (stride, dest); | |
1717 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1718 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1719 dest += stride; | |
1720 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
1721 vec_perm (ref0, ref1, perm1B))); | |
1722 | |
1723 ref0 = vec_ld (0, ref); | |
1724 ref1 = vec_ld (8, ref); | |
1725 ref += stride; | |
1726 prev = vec_ld (stride, dest); | |
1727 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1728 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1729 dest += stride; | |
1730 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), | |
1731 vec_perm (ref0, ref1, perm0B))); | |
1732 } while (--height); | |
1733 | |
1734 ref0 = vec_ld (0, ref); | |
1735 ref1 = vec_ld (8, ref); | |
1736 prev = vec_ld (stride, dest); | |
1737 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); | |
1738 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); | |
1739 dest += stride; | |
1740 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), | |
1741 vec_perm (ref0, ref1, perm1B))); | |
1742 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); | |
1743 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); | |
1744 } | |
1745 | |
1746 void MC_avg_y_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1747 const int stride, int height) | |
1748 { | |
1749 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; | |
1750 | |
1751 perm = vec_lvsl (0, ref); | |
1752 | |
1753 height = (height >> 1) - 1; | |
1754 | |
1755 ref0 = vec_ld (0, ref); | |
1756 ref1 = vec_ld (15, ref); | |
1757 ref += stride; | |
1758 tmp0 = vec_perm (ref0, ref1, perm); | |
1759 ref0 = vec_ld (0, ref); | |
1760 ref1 = vec_ld (15, ref); | |
1761 ref += stride; | |
1762 prev = vec_ld (0, dest); | |
1763 tmp1 = vec_perm (ref0, ref1, perm); | |
1764 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1765 | |
1766 do { | |
1767 ref0 = vec_ld (0, ref); | |
1768 ref1 = vec_ld (15, ref); | |
1769 ref += stride; | |
1770 prev = vec_ld (stride, dest); | |
1771 vec_st (tmp, 0, dest); | |
1772 tmp0 = vec_perm (ref0, ref1, perm); | |
1773 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1774 | |
1775 ref0 = vec_ld (0, ref); | |
1776 ref1 = vec_ld (15, ref); | |
1777 ref += stride; | |
1778 prev = vec_ld (2*stride, dest); | |
1779 vec_st (tmp, stride, dest); | |
1780 dest += 2*stride; | |
1781 tmp1 = vec_perm (ref0, ref1, perm); | |
1782 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1783 } while (--height); | |
1784 | |
1785 ref0 = vec_ld (0, ref); | |
1786 ref1 = vec_ld (15, ref); | |
1787 prev = vec_ld (stride, dest); | |
1788 vec_st (tmp, 0, dest); | |
1789 tmp0 = vec_perm (ref0, ref1, perm); | |
1790 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1791 vec_st (tmp, stride, dest); | |
1792 } | |
1793 | |
1794 void MC_avg_y_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1795 const int stride, int height) | |
1796 { | |
1797 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; | |
1798 | |
1799 tmp0 = vec_lvsl (0, ref); | |
1800 tmp0 = vec_mergeh (tmp0, tmp0); | |
1801 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); | |
1802 tmp1 = vec_lvsl (stride, ref); | |
1803 tmp1 = vec_mergeh (tmp1, tmp1); | |
1804 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); | |
1805 | |
1806 height = (height >> 1) - 1; | |
1807 | |
1808 ref0 = vec_ld (0, ref); | |
1809 ref1 = vec_ld (7, ref); | |
1810 ref += stride; | |
1811 tmp0 = vec_perm (ref0, ref1, perm0); | |
1812 ref0 = vec_ld (0, ref); | |
1813 ref1 = vec_ld (7, ref); | |
1814 ref += stride; | |
1815 prev = vec_ld (0, dest); | |
1816 tmp1 = vec_perm (ref0, ref1, perm1); | |
1817 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1818 | |
1819 do { | |
1820 ref0 = vec_ld (0, ref); | |
1821 ref1 = vec_ld (7, ref); | |
1822 ref += stride; | |
1823 prev = vec_ld (stride, dest); | |
1824 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1825 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1826 dest += stride; | |
1827 tmp0 = vec_perm (ref0, ref1, perm0); | |
1828 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1829 | |
1830 ref0 = vec_ld (0, ref); | |
1831 ref1 = vec_ld (7, ref); | |
1832 ref += stride; | |
1833 prev = vec_ld (stride, dest); | |
1834 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1835 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1836 dest += stride; | |
1837 tmp1 = vec_perm (ref0, ref1, perm1); | |
1838 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1839 } while (--height); | |
1840 | |
1841 ref0 = vec_ld (0, ref); | |
1842 ref1 = vec_ld (7, ref); | |
1843 prev = vec_ld (stride, dest); | |
1844 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1845 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1846 dest += stride; | |
1847 tmp0 = vec_perm (ref0, ref1, perm0); | |
1848 tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); | |
1849 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1850 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1851 } | |
1852 | |
1853 void MC_avg_xy_16_altivec (unsigned char * dest, const unsigned char * ref, | |
1854 const int stride, int height) | |
1855 { | |
1856 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; | |
1857 vector_u8_t ones, prev; | |
1858 | |
1859 ones = vec_splat_u8 (1); | |
1860 permA = vec_lvsl (0, ref); | |
1861 permB = vec_add (permA, ones); | |
1862 | |
1863 height = (height >> 1) - 1; | |
1864 | |
1865 ref0 = vec_ld (0, ref); | |
1866 ref1 = vec_ld (16, ref); | |
1867 ref += stride; | |
1868 A = vec_perm (ref0, ref1, permA); | |
1869 B = vec_perm (ref0, ref1, permB); | |
1870 avg0 = vec_avg (A, B); | |
1871 xor0 = vec_xor (A, B); | |
1872 | |
1873 ref0 = vec_ld (0, ref); | |
1874 ref1 = vec_ld (16, ref); | |
1875 ref += stride; | |
1876 prev = vec_ld (0, dest); | |
1877 A = vec_perm (ref0, ref1, permA); | |
1878 B = vec_perm (ref0, ref1, permB); | |
1879 avg1 = vec_avg (A, B); | |
1880 xor1 = vec_xor (A, B); | |
1881 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
1882 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1883 vec_xor (avg0, avg1)))); | |
1884 | |
1885 do { | |
1886 ref0 = vec_ld (0, ref); | |
1887 ref1 = vec_ld (16, ref); | |
1888 ref += stride; | |
1889 prev = vec_ld (stride, dest); | |
1890 vec_st (tmp, 0, dest); | |
1891 A = vec_perm (ref0, ref1, permA); | |
1892 B = vec_perm (ref0, ref1, permB); | |
1893 avg0 = vec_avg (A, B); | |
1894 xor0 = vec_xor (A, B); | |
1895 tmp = vec_avg (prev, | |
1896 vec_sub (vec_avg (avg0, avg1), | |
1897 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1898 vec_xor (avg0, avg1)))); | |
1899 | |
1900 ref0 = vec_ld (0, ref); | |
1901 ref1 = vec_ld (16, ref); | |
1902 ref += stride; | |
1903 prev = vec_ld (2*stride, dest); | |
1904 vec_st (tmp, stride, dest); | |
1905 dest += 2*stride; | |
1906 A = vec_perm (ref0, ref1, permA); | |
1907 B = vec_perm (ref0, ref1, permB); | |
1908 avg1 = vec_avg (A, B); | |
1909 xor1 = vec_xor (A, B); | |
1910 tmp = vec_avg (prev, | |
1911 vec_sub (vec_avg (avg0, avg1), | |
1912 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1913 vec_xor (avg0, avg1)))); | |
1914 } while (--height); | |
1915 | |
1916 ref0 = vec_ld (0, ref); | |
1917 ref1 = vec_ld (16, ref); | |
1918 prev = vec_ld (stride, dest); | |
1919 vec_st (tmp, 0, dest); | |
1920 A = vec_perm (ref0, ref1, permA); | |
1921 B = vec_perm (ref0, ref1, permB); | |
1922 avg0 = vec_avg (A, B); | |
1923 xor0 = vec_xor (A, B); | |
1924 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
1925 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1926 vec_xor (avg0, avg1)))); | |
1927 vec_st (tmp, stride, dest); | |
1928 } | |
1929 | |
1930 void MC_avg_xy_8_altivec (unsigned char * dest, const unsigned char * ref, | |
1931 const int stride, int height) | |
1932 { | |
1933 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; | |
1934 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; | |
1935 | |
1936 ones = vec_splat_u8 (1); | |
1937 perm0A = vec_lvsl (0, ref); | |
1938 perm0A = vec_mergeh (perm0A, perm0A); | |
1939 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); | |
1940 perm0B = vec_add (perm0A, ones); | |
1941 perm1A = vec_lvsl (stride, ref); | |
1942 perm1A = vec_mergeh (perm1A, perm1A); | |
1943 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); | |
1944 perm1B = vec_add (perm1A, ones); | |
1945 | |
1946 height = (height >> 1) - 1; | |
1947 | |
1948 ref0 = vec_ld (0, ref); | |
1949 ref1 = vec_ld (8, ref); | |
1950 ref += stride; | |
1951 A = vec_perm (ref0, ref1, perm0A); | |
1952 B = vec_perm (ref0, ref1, perm0B); | |
1953 avg0 = vec_avg (A, B); | |
1954 xor0 = vec_xor (A, B); | |
1955 | |
1956 ref0 = vec_ld (0, ref); | |
1957 ref1 = vec_ld (8, ref); | |
1958 ref += stride; | |
1959 prev = vec_ld (0, dest); | |
1960 A = vec_perm (ref0, ref1, perm1A); | |
1961 B = vec_perm (ref0, ref1, perm1B); | |
1962 avg1 = vec_avg (A, B); | |
1963 xor1 = vec_xor (A, B); | |
1964 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
1965 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1966 vec_xor (avg0, avg1)))); | |
1967 | |
1968 do { | |
1969 ref0 = vec_ld (0, ref); | |
1970 ref1 = vec_ld (8, ref); | |
1971 ref += stride; | |
1972 prev = vec_ld (stride, dest); | |
1973 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1974 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1975 dest += stride; | |
1976 A = vec_perm (ref0, ref1, perm0A); | |
1977 B = vec_perm (ref0, ref1, perm0B); | |
1978 avg0 = vec_avg (A, B); | |
1979 xor0 = vec_xor (A, B); | |
1980 tmp = vec_avg (prev, | |
1981 vec_sub (vec_avg (avg0, avg1), | |
1982 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1983 vec_xor (avg0, avg1)))); | |
1984 | |
1985 ref0 = vec_ld (0, ref); | |
1986 ref1 = vec_ld (8, ref); | |
1987 ref += stride; | |
1988 prev = vec_ld (stride, dest); | |
1989 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
1990 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
1991 dest += stride; | |
1992 A = vec_perm (ref0, ref1, perm1A); | |
1993 B = vec_perm (ref0, ref1, perm1B); | |
1994 avg1 = vec_avg (A, B); | |
1995 xor1 = vec_xor (A, B); | |
1996 tmp = vec_avg (prev, | |
1997 vec_sub (vec_avg (avg0, avg1), | |
1998 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
1999 vec_xor (avg0, avg1)))); | |
2000 } while (--height); | |
2001 | |
2002 ref0 = vec_ld (0, ref); | |
2003 ref1 = vec_ld (8, ref); | |
2004 prev = vec_ld (stride, dest); | |
2005 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
2006 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
2007 dest += stride; | |
2008 A = vec_perm (ref0, ref1, perm0A); | |
2009 B = vec_perm (ref0, ref1, perm0B); | |
2010 avg0 = vec_avg (A, B); | |
2011 xor0 = vec_xor (A, B); | |
2012 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), | |
2013 vec_and (vec_and (ones, vec_or (xor0, xor1)), | |
2014 vec_xor (avg0, avg1)))); | |
2015 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); | |
2016 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |
2017 } | |
2018 | |
2019 #endif /* __ALTIVEC__ */ |