comparison armv4l/dsputil_arm_s.S @ 2734:aeea63c97878 libavcodec

Better ARM support for mplayer/ffmpeg, ported from atty fork while playing with some new hardware, I found it's running a forked mplayer -- and it looks like they're following the GPL. The maintainer's page is here: http://atty.jp/?Zaurus/mplayer Unfortunately it's mostly in Japanese, so it's hard to figure out any details. Their code looks quite interesting (at least to those of us w/ ARM CPUs). The patches I've attached are the patches from atty.jp with a couple of modifications by myself: - ported to current CVS - reverted their change of removing SNOW support from ffmpeg - cleaned up their bswap mess - removed DOS-style linebreaks from various files patch by (Bernhard Rosenkraenzer: bero, arklinux org)
author michael
date Thu, 26 May 2005 14:32:46 +0000
parents
children ef2149182f1c
comparison
equal deleted inserted replaced
2733:32336384162e 2734:aeea63c97878
1 @
2 @ ARMv4L optimized DSP utils
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4 @
5 @ This library is free software; you can redistribute it and/or
6 @ modify it under the terms of the GNU Lesser General Public
7 @ License as published by the Free Software Foundation; either
8 @ version 2 of the License, or (at your option) any later version.
9 @
10 @ This library is distributed in the hope that it will be useful,
11 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
12 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 @ Lesser General Public License for more details.
14 @
15 @ You should have received a copy of the GNU Lesser General Public
16 @ License along with this library; if not, write to the Free Software
17 @ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 @
19
20 .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
21 mov \Rd0, \Rn0, lsr #(\shift * 8)
22 mov \Rd1, \Rn1, lsr #(\shift * 8)
23 mov \Rd2, \Rn2, lsr #(\shift * 8)
24 mov \Rd3, \Rn3, lsr #(\shift * 8)
25 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
26 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
27 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
28 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
29 .endm
30 .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
31 mov \R0, \R0, lsr #(\shift * 8)
32 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
33 mov \R1, \R1, lsr #(\shift * 8)
34 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
35 .endm
36 .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
37 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
38 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
39 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
40 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
41 .endm
42
43 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
44 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
45 @ Rmask = 0xFEFEFEFE
46 @ Rn = destroy
47 eor \Rd0, \Rn0, \Rm0
48 eor \Rd1, \Rn1, \Rm1
49 orr \Rn0, \Rn0, \Rm0
50 orr \Rn1, \Rn1, \Rm1
51 and \Rd0, \Rd0, \Rmask
52 and \Rd1, \Rd1, \Rmask
53 sub \Rd0, \Rn0, \Rd0, lsr #1
54 sub \Rd1, \Rn1, \Rd1, lsr #1
55 .endm
56
57 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
58 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
59 @ Rmask = 0xFEFEFEFE
60 @ Rn = destroy
61 eor \Rd0, \Rn0, \Rm0
62 eor \Rd1, \Rn1, \Rm1
63 and \Rn0, \Rn0, \Rm0
64 and \Rn1, \Rn1, \Rm1
65 and \Rd0, \Rd0, \Rmask
66 and \Rd1, \Rd1, \Rmask
67 add \Rd0, \Rn0, \Rd0, lsr #1
68 add \Rd1, \Rn1, \Rd1, lsr #1
69 .endm
70
71 @ ----------------------------------------------------------------
72 .align 8
73 .global put_pixels16_arm
74 put_pixels16_arm:
75 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
76 @ block = word aligned, pixles = unaligned
77 pld [r1]
78 stmfd sp!, {r4-r11, lr} @ R14 is also called LR
79 adr r5, 5f
80 ands r4, r1, #3
81 bic r1, r1, #3
82 add r5, r5, r4, lsl #2
83 ldrne pc, [r5]
84 1:
85 ldmia r1, {r4-r7}
86 add r1, r1, r2
87 stmia r0, {r4-r7}
88 pld [r1]
89 subs r3, r3, #1
90 add r0, r0, r2
91 bne 1b
92 ldmfd sp!, {r4-r11, pc}
93 .align 8
94 2:
95 ldmia r1, {r4-r8}
96 add r1, r1, r2
97 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
98 pld [r1]
99 subs r3, r3, #1
100 stmia r0, {r9-r12}
101 add r0, r0, r2
102 bne 2b
103 ldmfd sp!, {r4-r11, pc}
104 .align 8
105 3:
106 ldmia r1, {r4-r8}
107 add r1, r1, r2
108 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
109 pld [r1]
110 subs r3, r3, #1
111 stmia r0, {r9-r12}
112 add r0, r0, r2
113 bne 3b
114 ldmfd sp!, {r4-r11, pc}
115 .align 8
116 4:
117 ldmia r1, {r4-r8}
118 add r1, r1, r2
119 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
120 pld [r1]
121 subs r3, r3, #1
122 stmia r0, {r9-r12}
123 add r0, r0, r2
124 bne 4b
125 ldmfd sp!, {r4-r11,pc}
126 .align 8
127 5:
128 .word 1b
129 .word 2b
130 .word 3b
131 .word 4b
132
133 @ ----------------------------------------------------------------
134 .align 8
135 .global put_pixels8_arm
136 put_pixels8_arm:
137 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
138 @ block = word aligned, pixles = unaligned
139 pld [r1]
140 stmfd sp!, {r4-r5,lr} @ R14 is also called LR
141 adr r5, 5f
142 ands r4, r1, #3
143 bic r1, r1, #3
144 add r5, r5, r4, lsl #2
145 ldrne pc, [r5]
146 1:
147 ldmia r1, {r4-r5}
148 add r1, r1, r2
149 subs r3, r3, #1
150 pld [r1]
151 stmia r0, {r4-r5}
152 add r0, r0, r2
153 bne 1b
154 ldmfd sp!, {r4-r5,pc}
155 .align 8
156 2:
157 ldmia r1, {r4-r5, r12}
158 add r1, r1, r2
159 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
160 pld [r1]
161 subs r3, r3, #1
162 stmia r0, {r4-r5}
163 add r0, r0, r2
164 bne 2b
165 ldmfd sp!, {r4-r5,pc}
166 .align 8
167 3:
168 ldmia r1, {r4-r5, r12}
169 add r1, r1, r2
170 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
171 pld [r1]
172 subs r3, r3, #1
173 stmia r0, {r4-r5}
174 add r0, r0, r2
175 bne 3b
176 ldmfd sp!, {r4-r5,pc}
177 .align 8
178 4:
179 ldmia r1, {r4-r5, r12}
180 add r1, r1, r2
181 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
182 pld [r1]
183 subs r3, r3, #1
184 stmia r0, {r4-r5}
185 add r0, r0, r2
186 bne 4b
187 ldmfd sp!, {r4-r5,pc}
188 .align 8
189 5:
190 .word 1b
191 .word 2b
192 .word 3b
193 .word 4b
194
195 @ ----------------------------------------------------------------
196 .align 8
197 .global put_pixels8_x2_arm
198 put_pixels8_x2_arm:
199 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
200 @ block = word aligned, pixles = unaligned
201 pld [r1]
202 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
203 adr r5, 5f
204 ands r4, r1, #3
205 ldr r12, [r5]
206 add r5, r5, r4, lsl #2
207 bic r1, r1, #3
208 ldrne pc, [r5]
209 1:
210 ldmia r1, {r4-r5, r10}
211 add r1, r1, r2
212 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
213 pld [r1]
214 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
215 subs r3, r3, #1
216 stmia r0, {r8-r9}
217 add r0, r0, r2
218 bne 1b
219 ldmfd sp!, {r4-r10,pc}
220 .align 8
221 2:
222 ldmia r1, {r4-r5, r10}
223 add r1, r1, r2
224 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
225 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
226 pld [r1]
227 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
228 subs r3, r3, #1
229 stmia r0, {r4-r5}
230 add r0, r0, r2
231 bne 2b
232 ldmfd sp!, {r4-r10,pc}
233 .align 8
234 3:
235 ldmia r1, {r4-r5, r10}
236 add r1, r1, r2
237 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
238 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
239 pld [r1]
240 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
241 subs r3, r3, #1
242 stmia r0, {r4-r5}
243 add r0, r0, r2
244 bne 3b
245 ldmfd sp!, {r4-r10,pc}
246 .align 8
247 4:
248 ldmia r1, {r4-r5, r10}
249 add r1, r1, r2
250 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
251 pld [r1]
252 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
253 subs r3, r3, #1
254 stmia r0, {r8-r9}
255 add r0, r0, r2
256 bne 4b
257 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
258 .align 8
259 5:
260 .word 0xFEFEFEFE
261 .word 2b
262 .word 3b
263 .word 4b
264
265 .align 8
266 .global put_no_rnd_pixels8_x2_arm
267 put_no_rnd_pixels8_x2_arm:
268 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
269 @ block = word aligned, pixles = unaligned
270 pld [r1]
271 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
272 adr r5, 5f
273 ands r4, r1, #3
274 ldr r12, [r5]
275 add r5, r5, r4, lsl #2
276 bic r1, r1, #3
277 ldrne pc, [r5]
278 1:
279 ldmia r1, {r4-r5, r10}
280 add r1, r1, r2
281 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
282 pld [r1]
283 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
284 subs r3, r3, #1
285 stmia r0, {r8-r9}
286 add r0, r0, r2
287 bne 1b
288 ldmfd sp!, {r4-r10,pc}
289 .align 8
290 2:
291 ldmia r1, {r4-r5, r10}
292 add r1, r1, r2
293 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
294 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
295 pld [r1]
296 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
297 subs r3, r3, #1
298 stmia r0, {r4-r5}
299 add r0, r0, r2
300 bne 2b
301 ldmfd sp!, {r4-r10,pc}
302 .align 8
303 3:
304 ldmia r1, {r4-r5, r10}
305 add r1, r1, r2
306 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
307 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
308 pld [r1]
309 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
310 subs r3, r3, #1
311 stmia r0, {r4-r5}
312 add r0, r0, r2
313 bne 3b
314 ldmfd sp!, {r4-r10,pc}
315 .align 8
316 4:
317 ldmia r1, {r4-r5, r10}
318 add r1, r1, r2
319 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
320 pld [r1]
321 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
322 subs r3, r3, #1
323 stmia r0, {r8-r9}
324 add r0, r0, r2
325 bne 4b
326 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
327 .align 8
328 5:
329 .word 0xFEFEFEFE
330 .word 2b
331 .word 3b
332 .word 4b
333
334
335 @ ----------------------------------------------------------------
336 .align 8
337 .global put_pixels8_y2_arm
338 put_pixels8_y2_arm:
339 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
340 @ block = word aligned, pixles = unaligned
341 pld [r1]
342 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
343 adr r5, 5f
344 ands r4, r1, #3
345 mov r3, r3, lsr #1
346 ldr r12, [r5]
347 add r5, r5, r4, lsl #2
348 bic r1, r1, #3
349 ldrne pc, [r5]
350 1:
351 ldmia r1, {r4-r5}
352 add r1, r1, r2
353 6: ldmia r1, {r6-r7}
354 add r1, r1, r2
355 pld [r1]
356 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
357 ldmia r1, {r4-r5}
358 add r1, r1, r2
359 stmia r0, {r8-r9}
360 add r0, r0, r2
361 pld [r1]
362 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
363 subs r3, r3, #1
364 stmia r0, {r8-r9}
365 add r0, r0, r2
366 bne 6b
367 ldmfd sp!, {r4-r11,pc}
368 .align 8
369 2:
370 ldmia r1, {r4-r6}
371 add r1, r1, r2
372 pld [r1]
373 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
374 6: ldmia r1, {r7-r9}
375 add r1, r1, r2
376 pld [r1]
377 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
378 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
379 stmia r0, {r10-r11}
380 add r0, r0, r2
381 ldmia r1, {r4-r6}
382 add r1, r1, r2
383 pld [r1]
384 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
385 subs r3, r3, #1
386 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
387 stmia r0, {r10-r11}
388 add r0, r0, r2
389 bne 6b
390 ldmfd sp!, {r4-r11,pc}
391 .align 8
392 3:
393 ldmia r1, {r4-r6}
394 add r1, r1, r2
395 pld [r1]
396 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
397 6: ldmia r1, {r7-r9}
398 add r1, r1, r2
399 pld [r1]
400 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
401 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
402 stmia r0, {r10-r11}
403 add r0, r0, r2
404 ldmia r1, {r4-r6}
405 add r1, r1, r2
406 pld [r1]
407 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
408 subs r3, r3, #1
409 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
410 stmia r0, {r10-r11}
411 add r0, r0, r2
412 bne 6b
413 ldmfd sp!, {r4-r11,pc}
414 .align 8
415 4:
416 ldmia r1, {r4-r6}
417 add r1, r1, r2
418 pld [r1]
419 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
420 6: ldmia r1, {r7-r9}
421 add r1, r1, r2
422 pld [r1]
423 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
424 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
425 stmia r0, {r10-r11}
426 add r0, r0, r2
427 ldmia r1, {r4-r6}
428 add r1, r1, r2
429 pld [r1]
430 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
431 subs r3, r3, #1
432 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
433 stmia r0, {r10-r11}
434 add r0, r0, r2
435 bne 6b
436 ldmfd sp!, {r4-r11,pc}
437
438 .align 8
439 5:
440 .word 0xFEFEFEFE
441 .word 2b
442 .word 3b
443 .word 4b
444
445 .align 8
446 .global put_no_rnd_pixels8_y2_arm
447 put_no_rnd_pixels8_y2_arm:
448 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
449 @ block = word aligned, pixles = unaligned
450 pld [r1]
451 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
452 adr r5, 5f
453 ands r4, r1, #3
454 mov r3, r3, lsr #1
455 ldr r12, [r5]
456 add r5, r5, r4, lsl #2
457 bic r1, r1, #3
458 ldrne pc, [r5]
459 1:
460 ldmia r1, {r4-r5}
461 add r1, r1, r2
462 6: ldmia r1, {r6-r7}
463 add r1, r1, r2
464 pld [r1]
465 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
466 ldmia r1, {r4-r5}
467 add r1, r1, r2
468 stmia r0, {r8-r9}
469 add r0, r0, r2
470 pld [r1]
471 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
472 subs r3, r3, #1
473 stmia r0, {r8-r9}
474 add r0, r0, r2
475 bne 6b
476 ldmfd sp!, {r4-r11,pc}
477 .align 8
478 2:
479 ldmia r1, {r4-r6}
480 add r1, r1, r2
481 pld [r1]
482 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
483 6: ldmia r1, {r7-r9}
484 add r1, r1, r2
485 pld [r1]
486 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488 stmia r0, {r10-r11}
489 add r0, r0, r2
490 ldmia r1, {r4-r6}
491 add r1, r1, r2
492 pld [r1]
493 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
494 subs r3, r3, #1
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496 stmia r0, {r10-r11}
497 add r0, r0, r2
498 bne 6b
499 ldmfd sp!, {r4-r11,pc}
500 .align 8
501 3:
502 ldmia r1, {r4-r6}
503 add r1, r1, r2
504 pld [r1]
505 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
506 6: ldmia r1, {r7-r9}
507 add r1, r1, r2
508 pld [r1]
509 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511 stmia r0, {r10-r11}
512 add r0, r0, r2
513 ldmia r1, {r4-r6}
514 add r1, r1, r2
515 pld [r1]
516 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
517 subs r3, r3, #1
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519 stmia r0, {r10-r11}
520 add r0, r0, r2
521 bne 6b
522 ldmfd sp!, {r4-r11,pc}
523 .align 8
524 4:
525 ldmia r1, {r4-r6}
526 add r1, r1, r2
527 pld [r1]
528 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
529 6: ldmia r1, {r7-r9}
530 add r1, r1, r2
531 pld [r1]
532 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
533 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
534 stmia r0, {r10-r11}
535 add r0, r0, r2
536 ldmia r1, {r4-r6}
537 add r1, r1, r2
538 pld [r1]
539 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
540 subs r3, r3, #1
541 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
542 stmia r0, {r10-r11}
543 add r0, r0, r2
544 bne 6b
545 ldmfd sp!, {r4-r11,pc}
546 .align 8
547 5:
548 .word 0xFEFEFEFE
549 .word 2b
550 .word 3b
551 .word 4b
552
553 @ ----------------------------------------------------------------
554 .macro RND_XY2_IT align, rnd
555 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
556 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
557 .if \align == 0
558 ldmia r1, {r6-r8}
559 .elseif \align == 3
560 ldmia r1, {r5-r7}
561 .else
562 ldmia r1, {r8-r10}
563 .endif
564 add r1, r1, r2
565 pld [r1]
566 .if \align == 0
567 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
568 .elseif \align == 1
569 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
570 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
571 .elseif \align == 2
572 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
573 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
574 .elseif \align == 3
575 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
576 .endif
577 ldr r14, [r12, #0] @ 0x03030303
578 tst r3, #1
579 and r8, r4, r14
580 and r9, r5, r14
581 and r10, r6, r14
582 and r11, r7, r14
583 .if \rnd == 1
584 ldreq r14, [r12, #16] @ 0x02020202
585 .else
586 ldreq r14, [r12, #28] @ 0x01010101
587 .endif
588 add r8, r8, r10
589 add r9, r9, r11
590 addeq r8, r8, r14
591 addeq r9, r9, r14
592 ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2
593 and r4, r14, r4, lsr #2
594 and r5, r14, r5, lsr #2
595 and r6, r14, r6, lsr #2
596 and r7, r14, r7, lsr #2
597 add r10, r4, r6
598 add r11, r5, r7
599 .endm
600
601 .macro RND_XY2_EXPAND align, rnd
602 RND_XY2_IT \align, \rnd
603 6: stmfd sp!, {r8-r11}
604 RND_XY2_IT \align, \rnd
605 ldmfd sp!, {r4-r7}
606 add r4, r4, r8
607 add r5, r5, r9
608 add r6, r6, r10
609 add r7, r7, r11
610 ldr r14, [r12, #24] @ 0x0F0F0F0F
611 and r4, r14, r4, lsr #2
612 and r5, r14, r5, lsr #2
613 add r4, r4, r6
614 add r5, r5, r7
615 subs r3, r3, #1
616 stmia r0, {r4-r5}
617 add r0, r0, r2
618 bne 6b
619 ldmfd sp!, {r4-r11,pc}
620 .endm
621
622 .align 8
623 .global put_pixels8_xy2_arm
624 put_pixels8_xy2_arm:
625 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
626 @ block = word aligned, pixles = unaligned
627 pld [r1]
628 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
629 adrl r12, 5f
630 ands r4, r1, #3
631 add r5, r12, r4, lsl #2
632 bic r1, r1, #3
633 ldrne pc, [r5]
634 1:
635 RND_XY2_EXPAND 0, 1
636
637 .align 8
638 2:
639 RND_XY2_EXPAND 1, 1
640
641 .align 8
642 3:
643 RND_XY2_EXPAND 2, 1
644
645 .align 8
646 4:
647 RND_XY2_EXPAND 3, 1
648
649 5:
650 .word 0x03030303
651 .word 2b
652 .word 3b
653 .word 4b
654 .word 0x02020202
655 .word 0xFCFCFCFC >> 2
656 .word 0x0F0F0F0F
657 .word 0x01010101
658
659 .align 8
660 .global put_no_rnd_pixels8_xy2_arm
661 put_no_rnd_pixels8_xy2_arm:
662 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
663 @ block = word aligned, pixles = unaligned
664 pld [r1]
665 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
666 adrl r12, 5f
667 ands r4, r1, #3
668 add r5, r12, r4, lsl #2
669 bic r1, r1, #3
670 ldrne pc, [r5]
671 1:
672 RND_XY2_EXPAND 0, 0
673
674 .align 8
675 2:
676 RND_XY2_EXPAND 1, 0
677
678 .align 8
679 3:
680 RND_XY2_EXPAND 2, 0
681
682 .align 8
683 4:
684 RND_XY2_EXPAND 3, 0
685
686 5:
687 .word 0x03030303
688 .word 2b
689 .word 3b
690 .word 4b
691 .word 0x02020202
692 .word 0xFCFCFCFC >> 2
693 .word 0x0F0F0F0F
694 .word 0x01010101