Mercurial > libavcodec.hg
annotate arm/dsputil_arm.S @ 10893:2aafcafbe1f0 libavcodec
Replace cabac checks in inline functions from h264.h with constants.
No benchmark because its just replacing variables with litteral constants
(so no risk for slowdown outside gcc silliness) and i need sleep.
author | michael |
---|---|
date | Sat, 16 Jan 2010 05:41:33 +0000 |
parents | a6ff6fb10ff5 |
children | 361a5fcb4393 |
rev | line source |
---|---|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1 @ |
8359 | 2 @ ARMv4 optimized DSP utils |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
4 @ |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 @ This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 @ |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 @ FFmpeg is free software; you can redistribute it and/or |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
8 @ modify it under the terms of the GNU Lesser General Public |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
9 @ License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 @ version 2.1 of the License, or (at your option) any later version. |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
11 @ |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
12 @ FFmpeg is distributed in the hope that it will be useful, |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
15 @ Lesser General Public License for more details. |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
16 @ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
17 @ You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
18 @ License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
20 @ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
21 |
6528
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
22 #include "config.h" |
8069 | 23 #include "asm.S" |
6528
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
24 |
8070 | 25 preserve8 |
26 | |
8590 | 27 #if !HAVE_PLD |
6528
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
28 .macro pld reg |
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
29 .endm |
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
30 #endif |
28d0e7dac45d
pld instruction are used in libavcodec/armv4l/dsputil_arm_s.S which can be used
diego
parents:
5641
diff
changeset
|
31 |
8590 | 32 #if HAVE_ARMV5TE |
8070 | 33 function ff_prefetch_arm, export=1 |
10355 | 34 subs r2, r2, #1 |
35 pld [r0] | |
36 add r0, r0, r1 | |
37 bne ff_prefetch_arm | |
38 bx lr | |
8070 | 39 .endfunc |
40 #endif | |
41 | |
10357 | 42 .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 |
10355 | 43 mov \Rd0, \Rn0, lsr #(\shift * 8) |
44 mov \Rd1, \Rn1, lsr #(\shift * 8) | |
45 mov \Rd2, \Rn2, lsr #(\shift * 8) | |
46 mov \Rd3, \Rn3, lsr #(\shift * 8) | |
47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) | |
48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) | |
49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) | |
50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
51 .endm |
10357 | 52 .macro ALIGN_DWORD shift, R0, R1, R2 |
10355 | 53 mov \R0, \R0, lsr #(\shift * 8) |
54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) | |
55 mov \R1, \R1, lsr #(\shift * 8) | |
56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
57 .endm |
10357 | 58 .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 |
10355 | 59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) |
60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) | |
61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) | |
62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
63 .endm |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
64 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
67 @ Rmask = 0xFEFEFEFE |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
68 @ Rn = destroy |
10355 | 69 eor \Rd0, \Rn0, \Rm0 |
70 eor \Rd1, \Rn1, \Rm1 | |
71 orr \Rn0, \Rn0, \Rm0 | |
72 orr \Rn1, \Rn1, \Rm1 | |
73 and \Rd0, \Rd0, \Rmask | |
74 and \Rd1, \Rd1, \Rmask | |
75 sub \Rd0, \Rn0, \Rd0, lsr #1 | |
76 sub \Rd1, \Rn1, \Rd1, lsr #1 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
77 .endm |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
78 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
81 @ Rmask = 0xFEFEFEFE |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
82 @ Rn = destroy |
10355 | 83 eor \Rd0, \Rn0, \Rm0 |
84 eor \Rd1, \Rn1, \Rm1 | |
85 and \Rn0, \Rn0, \Rm0 | |
86 and \Rn1, \Rn1, \Rm1 | |
87 and \Rd0, \Rd0, \Rmask | |
88 and \Rd1, \Rd1, \Rmask | |
89 add \Rd0, \Rn0, \Rd0, lsr #1 | |
90 add \Rd1, \Rn1, \Rd1, lsr #1 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
91 .endm |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
92 |
8682 | 93 .macro JMP_ALIGN tmp, reg |
10355 | 94 ands \tmp, \reg, #3 |
95 bic \reg, \reg, #3 | |
96 beq 1f | |
97 subs \tmp, \tmp, #1 | |
98 beq 2f | |
99 subs \tmp, \tmp, #1 | |
100 beq 3f | |
8680 | 101 b 4f |
102 .endm | |
103 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
104 @ ---------------------------------------------------------------- |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
105 .align 5 |
10363 | 106 function ff_put_pixels16_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
108 @ block = word aligned, pixles = unaligned |
10355 | 109 pld [r1] |
10356 | 110 push {r4-r11, lr} |
10355 | 111 JMP_ALIGN r5, r1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
112 1: |
10356 | 113 ldm r1, {r4-r7} |
10355 | 114 add r1, r1, r2 |
10356 | 115 stm r0, {r4-r7} |
10355 | 116 pld [r1] |
117 subs r3, r3, #1 | |
118 add r0, r0, r2 | |
119 bne 1b | |
10356 | 120 pop {r4-r11, pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
121 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
122 2: |
10356 | 123 ldm r1, {r4-r8} |
10355 | 124 add r1, r1, r2 |
10357 | 125 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
10355 | 126 pld [r1] |
127 subs r3, r3, #1 | |
10356 | 128 stm r0, {r9-r12} |
10355 | 129 add r0, r0, r2 |
130 bne 2b | |
10356 | 131 pop {r4-r11, pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
132 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
133 3: |
10356 | 134 ldm r1, {r4-r8} |
10355 | 135 add r1, r1, r2 |
10357 | 136 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
10355 | 137 pld [r1] |
138 subs r3, r3, #1 | |
10356 | 139 stm r0, {r9-r12} |
10355 | 140 add r0, r0, r2 |
141 bne 3b | |
10356 | 142 pop {r4-r11, pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
143 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
144 4: |
10356 | 145 ldm r1, {r4-r8} |
10355 | 146 add r1, r1, r2 |
10357 | 147 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
10355 | 148 pld [r1] |
149 subs r3, r3, #1 | |
10356 | 150 stm r0, {r9-r12} |
10355 | 151 add r0, r0, r2 |
152 bne 4b | |
10356 | 153 pop {r4-r11,pc} |
8069 | 154 .endfunc |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
155 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
156 @ ---------------------------------------------------------------- |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
157 .align 5 |
10363 | 158 function ff_put_pixels8_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
160 @ block = word aligned, pixles = unaligned |
10355 | 161 pld [r1] |
10356 | 162 push {r4-r5,lr} |
10355 | 163 JMP_ALIGN r5, r1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
164 1: |
10356 | 165 ldm r1, {r4-r5} |
10355 | 166 add r1, r1, r2 |
167 subs r3, r3, #1 | |
168 pld [r1] | |
10356 | 169 stm r0, {r4-r5} |
10355 | 170 add r0, r0, r2 |
171 bne 1b | |
10356 | 172 pop {r4-r5,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
173 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
174 2: |
10356 | 175 ldm r1, {r4-r5, r12} |
10355 | 176 add r1, r1, r2 |
10357 | 177 ALIGN_DWORD 1, r4, r5, r12 |
10355 | 178 pld [r1] |
179 subs r3, r3, #1 | |
10356 | 180 stm r0, {r4-r5} |
10355 | 181 add r0, r0, r2 |
182 bne 2b | |
10356 | 183 pop {r4-r5,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
184 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
185 3: |
10356 | 186 ldm r1, {r4-r5, r12} |
10355 | 187 add r1, r1, r2 |
10357 | 188 ALIGN_DWORD 2, r4, r5, r12 |
10355 | 189 pld [r1] |
190 subs r3, r3, #1 | |
10356 | 191 stm r0, {r4-r5} |
10355 | 192 add r0, r0, r2 |
193 bne 3b | |
10356 | 194 pop {r4-r5,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
195 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
196 4: |
10356 | 197 ldm r1, {r4-r5, r12} |
10355 | 198 add r1, r1, r2 |
10357 | 199 ALIGN_DWORD 3, r4, r5, r12 |
10355 | 200 pld [r1] |
201 subs r3, r3, #1 | |
10356 | 202 stm r0, {r4-r5} |
10355 | 203 add r0, r0, r2 |
204 bne 4b | |
10356 | 205 pop {r4-r5,pc} |
8069 | 206 .endfunc |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
207 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
208 @ ---------------------------------------------------------------- |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
209 .align 5 |
10363 | 210 function ff_put_pixels8_x2_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
212 @ block = word aligned, pixles = unaligned |
10355 | 213 pld [r1] |
10356 | 214 push {r4-r10,lr} |
10355 | 215 ldr r12, =0xfefefefe |
216 JMP_ALIGN r5, r1 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
217 1: |
10356 | 218 ldm r1, {r4-r5, r10} |
10355 | 219 add r1, r1, r2 |
10357 | 220 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
10355 | 221 pld [r1] |
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
223 subs r3, r3, #1 | |
10356 | 224 stm r0, {r8-r9} |
10355 | 225 add r0, r0, r2 |
226 bne 1b | |
10356 | 227 pop {r4-r10,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
228 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
229 2: |
10356 | 230 ldm r1, {r4-r5, r10} |
10355 | 231 add r1, r1, r2 |
10357 | 232 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
233 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
10355 | 234 pld [r1] |
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
236 subs r3, r3, #1 | |
10356 | 237 stm r0, {r4-r5} |
10355 | 238 add r0, r0, r2 |
239 bne 2b | |
10356 | 240 pop {r4-r10,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
241 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
242 3: |
10356 | 243 ldm r1, {r4-r5, r10} |
10355 | 244 add r1, r1, r2 |
10357 | 245 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 |
246 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
10355 | 247 pld [r1] |
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
249 subs r3, r3, #1 | |
10356 | 250 stm r0, {r4-r5} |
10355 | 251 add r0, r0, r2 |
252 bne 3b | |
10356 | 253 pop {r4-r10,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
254 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
255 4: |
10356 | 256 ldm r1, {r4-r5, r10} |
10355 | 257 add r1, r1, r2 |
10357 | 258 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 |
10355 | 259 pld [r1] |
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
261 subs r3, r3, #1 | |
10356 | 262 stm r0, {r8-r9} |
10355 | 263 add r0, r0, r2 |
264 bne 4b | |
10356 | 265 pop {r4-r10,pc} |
8069 | 266 .endfunc |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
267 |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
268 .align 5 |
10363 | 269 function ff_put_no_rnd_pixels8_x2_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
271 @ block = word aligned, pixles = unaligned |
10355 | 272 pld [r1] |
10356 | 273 push {r4-r10,lr} |
10355 | 274 ldr r12, =0xfefefefe |
275 JMP_ALIGN r5, r1 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
276 1: |
10356 | 277 ldm r1, {r4-r5, r10} |
10355 | 278 add r1, r1, r2 |
10357 | 279 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
10355 | 280 pld [r1] |
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
282 subs r3, r3, #1 | |
10356 | 283 stm r0, {r8-r9} |
10355 | 284 add r0, r0, r2 |
285 bne 1b | |
10356 | 286 pop {r4-r10,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
287 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
288 2: |
10356 | 289 ldm r1, {r4-r5, r10} |
10355 | 290 add r1, r1, r2 |
10357 | 291 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 |
292 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
10355 | 293 pld [r1] |
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
295 subs r3, r3, #1 | |
10356 | 296 stm r0, {r4-r5} |
10355 | 297 add r0, r0, r2 |
298 bne 2b | |
10356 | 299 pop {r4-r10,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
300 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
301 3: |
10356 | 302 ldm r1, {r4-r5, r10} |
10355 | 303 add r1, r1, r2 |
10357 | 304 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 |
305 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
10355 | 306 pld [r1] |
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
308 subs r3, r3, #1 | |
10356 | 309 stm r0, {r4-r5} |
10355 | 310 add r0, r0, r2 |
311 bne 3b | |
10356 | 312 pop {r4-r10,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
313 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
314 4: |
10356 | 315 ldm r1, {r4-r5, r10} |
10355 | 316 add r1, r1, r2 |
10357 | 317 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 |
10355 | 318 pld [r1] |
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
320 subs r3, r3, #1 | |
10356 | 321 stm r0, {r8-r9} |
10355 | 322 add r0, r0, r2 |
323 bne 4b | |
10356 | 324 pop {r4-r10,pc} |
8069 | 325 .endfunc |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
326 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
327 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
328 @ ---------------------------------------------------------------- |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
329 .align 5 |
10363 | 330 function ff_put_pixels8_y2_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
332 @ block = word aligned, pixles = unaligned |
10355 | 333 pld [r1] |
10356 | 334 push {r4-r11,lr} |
10355 | 335 mov r3, r3, lsr #1 |
336 ldr r12, =0xfefefefe | |
337 JMP_ALIGN r5, r1 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
338 1: |
10356 | 339 ldm r1, {r4-r5} |
10355 | 340 add r1, r1, r2 |
10356 | 341 6: ldm r1, {r6-r7} |
10355 | 342 add r1, r1, r2 |
343 pld [r1] | |
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
10356 | 345 ldm r1, {r4-r5} |
10355 | 346 add r1, r1, r2 |
10356 | 347 stm r0, {r8-r9} |
10355 | 348 add r0, r0, r2 |
349 pld [r1] | |
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
351 subs r3, r3, #1 | |
10356 | 352 stm r0, {r8-r9} |
10355 | 353 add r0, r0, r2 |
354 bne 6b | |
10356 | 355 pop {r4-r11,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
356 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
357 2: |
10356 | 358 ldm r1, {r4-r6} |
10355 | 359 add r1, r1, r2 |
360 pld [r1] | |
10357 | 361 ALIGN_DWORD 1, r4, r5, r6 |
10356 | 362 6: ldm r1, {r7-r9} |
10355 | 363 add r1, r1, r2 |
364 pld [r1] | |
10357 | 365 ALIGN_DWORD 1, r7, r8, r9 |
10355 | 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
10356 | 367 stm r0, {r10-r11} |
10355 | 368 add r0, r0, r2 |
10356 | 369 ldm r1, {r4-r6} |
10355 | 370 add r1, r1, r2 |
371 pld [r1] | |
10357 | 372 ALIGN_DWORD 1, r4, r5, r6 |
10355 | 373 subs r3, r3, #1 |
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
10356 | 375 stm r0, {r10-r11} |
10355 | 376 add r0, r0, r2 |
377 bne 6b | |
10356 | 378 pop {r4-r11,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
379 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
380 3: |
10356 | 381 ldm r1, {r4-r6} |
10355 | 382 add r1, r1, r2 |
383 pld [r1] | |
10357 | 384 ALIGN_DWORD 2, r4, r5, r6 |
10356 | 385 6: ldm r1, {r7-r9} |
10355 | 386 add r1, r1, r2 |
387 pld [r1] | |
10357 | 388 ALIGN_DWORD 2, r7, r8, r9 |
10355 | 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
10356 | 390 stm r0, {r10-r11} |
10355 | 391 add r0, r0, r2 |
10356 | 392 ldm r1, {r4-r6} |
10355 | 393 add r1, r1, r2 |
394 pld [r1] | |
10357 | 395 ALIGN_DWORD 2, r4, r5, r6 |
10355 | 396 subs r3, r3, #1 |
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
10356 | 398 stm r0, {r10-r11} |
10355 | 399 add r0, r0, r2 |
400 bne 6b | |
10356 | 401 pop {r4-r11,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
402 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
403 4: |
10356 | 404 ldm r1, {r4-r6} |
10355 | 405 add r1, r1, r2 |
406 pld [r1] | |
10357 | 407 ALIGN_DWORD 3, r4, r5, r6 |
10356 | 408 6: ldm r1, {r7-r9} |
10355 | 409 add r1, r1, r2 |
410 pld [r1] | |
10357 | 411 ALIGN_DWORD 3, r7, r8, r9 |
10355 | 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
10356 | 413 stm r0, {r10-r11} |
10355 | 414 add r0, r0, r2 |
10356 | 415 ldm r1, {r4-r6} |
10355 | 416 add r1, r1, r2 |
417 pld [r1] | |
10357 | 418 ALIGN_DWORD 3, r4, r5, r6 |
10355 | 419 subs r3, r3, #1 |
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
10356 | 421 stm r0, {r10-r11} |
10355 | 422 add r0, r0, r2 |
423 bne 6b | |
10356 | 424 pop {r4-r11,pc} |
8069 | 425 .endfunc |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
426 |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
427 .align 5 |
10363 | 428 function ff_put_no_rnd_pixels8_y2_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
430 @ block = word aligned, pixles = unaligned |
10355 | 431 pld [r1] |
10356 | 432 push {r4-r11,lr} |
10355 | 433 mov r3, r3, lsr #1 |
434 ldr r12, =0xfefefefe | |
435 JMP_ALIGN r5, r1 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
436 1: |
10356 | 437 ldm r1, {r4-r5} |
10355 | 438 add r1, r1, r2 |
10356 | 439 6: ldm r1, {r6-r7} |
10355 | 440 add r1, r1, r2 |
441 pld [r1] | |
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
10356 | 443 ldm r1, {r4-r5} |
10355 | 444 add r1, r1, r2 |
10356 | 445 stm r0, {r8-r9} |
10355 | 446 add r0, r0, r2 |
447 pld [r1] | |
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
449 subs r3, r3, #1 | |
10356 | 450 stm r0, {r8-r9} |
10355 | 451 add r0, r0, r2 |
452 bne 6b | |
10356 | 453 pop {r4-r11,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
454 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
455 2: |
10356 | 456 ldm r1, {r4-r6} |
10355 | 457 add r1, r1, r2 |
458 pld [r1] | |
10357 | 459 ALIGN_DWORD 1, r4, r5, r6 |
10356 | 460 6: ldm r1, {r7-r9} |
10355 | 461 add r1, r1, r2 |
462 pld [r1] | |
10357 | 463 ALIGN_DWORD 1, r7, r8, r9 |
10355 | 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
10356 | 465 stm r0, {r10-r11} |
10355 | 466 add r0, r0, r2 |
10356 | 467 ldm r1, {r4-r6} |
10355 | 468 add r1, r1, r2 |
469 pld [r1] | |
10357 | 470 ALIGN_DWORD 1, r4, r5, r6 |
10355 | 471 subs r3, r3, #1 |
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
10356 | 473 stm r0, {r10-r11} |
10355 | 474 add r0, r0, r2 |
475 bne 6b | |
10356 | 476 pop {r4-r11,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
477 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
478 3: |
10356 | 479 ldm r1, {r4-r6} |
10355 | 480 add r1, r1, r2 |
481 pld [r1] | |
10357 | 482 ALIGN_DWORD 2, r4, r5, r6 |
10356 | 483 6: ldm r1, {r7-r9} |
10355 | 484 add r1, r1, r2 |
485 pld [r1] | |
10357 | 486 ALIGN_DWORD 2, r7, r8, r9 |
10355 | 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
10356 | 488 stm r0, {r10-r11} |
10355 | 489 add r0, r0, r2 |
10356 | 490 ldm r1, {r4-r6} |
10355 | 491 add r1, r1, r2 |
492 pld [r1] | |
10357 | 493 ALIGN_DWORD 2, r4, r5, r6 |
10355 | 494 subs r3, r3, #1 |
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
10356 | 496 stm r0, {r10-r11} |
10355 | 497 add r0, r0, r2 |
498 bne 6b | |
10356 | 499 pop {r4-r11,pc} |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
500 .align 5 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
501 4: |
10356 | 502 ldm r1, {r4-r6} |
10355 | 503 add r1, r1, r2 |
504 pld [r1] | |
10357 | 505 ALIGN_DWORD 3, r4, r5, r6 |
10356 | 506 6: ldm r1, {r7-r9} |
10355 | 507 add r1, r1, r2 |
508 pld [r1] | |
10357 | 509 ALIGN_DWORD 3, r7, r8, r9 |
10355 | 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
10356 | 511 stm r0, {r10-r11} |
10355 | 512 add r0, r0, r2 |
10356 | 513 ldm r1, {r4-r6} |
10355 | 514 add r1, r1, r2 |
515 pld [r1] | |
10357 | 516 ALIGN_DWORD 3, r4, r5, r6 |
10355 | 517 subs r3, r3, #1 |
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
10356 | 519 stm r0, {r10-r11} |
10355 | 520 add r0, r0, r2 |
521 bne 6b | |
10356 | 522 pop {r4-r11,pc} |
8069 | 523 .endfunc |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
524 |
8679 | 525 .ltorg |
526 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
527 @ ---------------------------------------------------------------- |
8679 | 528 .macro RND_XY2_IT align, rnd |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
531 .if \align == 0 |
10356 | 532 ldm r1, {r6-r8} |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
533 .elseif \align == 3 |
10356 | 534 ldm r1, {r5-r7} |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
535 .else |
10356 | 536 ldm r1, {r8-r10} |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
537 .endif |
10355 | 538 add r1, r1, r2 |
539 pld [r1] | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
540 .if \align == 0 |
10357 | 541 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
542 .elseif \align == 1 |
10357 | 543 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10 |
544 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
545 .elseif \align == 2 |
10357 | 546 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10 |
547 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
548 .elseif \align == 3 |
10357 | 549 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
550 .endif |
10355 | 551 ldr r14, =0x03030303 |
552 tst r3, #1 | |
553 and r8, r4, r14 | |
554 and r9, r5, r14 | |
555 and r10, r6, r14 | |
556 and r11, r7, r14 | |
557 andeq r14, r14, r14, \rnd #1 | |
558 add r8, r8, r10 | |
559 add r9, r9, r11 | |
560 ldr r12, =0xfcfcfcfc >> 2 | |
561 addeq r8, r8, r14 | |
562 addeq r9, r9, r14 | |
563 and r4, r12, r4, lsr #2 | |
564 and r5, r12, r5, lsr #2 | |
565 and r6, r12, r6, lsr #2 | |
566 and r7, r12, r7, lsr #2 | |
567 add r10, r4, r6 | |
568 add r11, r5, r7 | |
569 subs r3, r3, #1 | |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
570 .endm |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
571 |
8679 | 572 .macro RND_XY2_EXPAND align, rnd |
10355 | 573 RND_XY2_IT \align, \rnd |
10356 | 574 6: push {r8-r11} |
10355 | 575 RND_XY2_IT \align, \rnd |
10356 | 576 pop {r4-r7} |
10355 | 577 add r4, r4, r8 |
578 add r5, r5, r9 | |
579 ldr r14, =0x0f0f0f0f | |
580 add r6, r6, r10 | |
581 add r7, r7, r11 | |
582 and r4, r14, r4, lsr #2 | |
583 and r5, r14, r5, lsr #2 | |
584 add r4, r4, r6 | |
585 add r5, r5, r7 | |
10356 | 586 stm r0, {r4-r5} |
10355 | 587 add r0, r0, r2 |
588 bge 6b | |
10356 | 589 pop {r4-r11,pc} |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
590 .endm |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
591 |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
592 .align 5 |
10363 | 593 function ff_put_pixels8_xy2_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
595 @ block = word aligned, pixles = unaligned |
10355 | 596 pld [r1] |
10356 | 597 push {r4-r11,lr} @ R14 is also called LR |
10355 | 598 JMP_ALIGN r5, r1 |
10371 | 599 1: RND_XY2_EXPAND 0, lsl |
600 .align 5 | |
601 2: RND_XY2_EXPAND 1, lsl | |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
602 .align 5 |
10371 | 603 3: RND_XY2_EXPAND 2, lsl |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
604 .align 5 |
10371 | 605 4: RND_XY2_EXPAND 3, lsl |
8069 | 606 .endfunc |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
607 |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
608 .align 5 |
10363 | 609 function ff_put_no_rnd_pixels8_xy2_arm, export=1 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
610 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
611 @ block = word aligned, pixles = unaligned |
10355 | 612 pld [r1] |
10356 | 613 push {r4-r11,lr} |
10355 | 614 JMP_ALIGN r5, r1 |
10371 | 615 1: RND_XY2_EXPAND 0, lsr |
616 .align 5 | |
617 2: RND_XY2_EXPAND 1, lsr | |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
618 .align 5 |
10371 | 619 3: RND_XY2_EXPAND 2, lsr |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
620 .align 5 |
10371 | 621 4: RND_XY2_EXPAND 3, lsr |
8069 | 622 .endfunc |
8072 | 623 |
8678
6c256fc075e3
ARM: change alignment of loops in put_pixels*_arm to 32
mru
parents:
8590
diff
changeset
|
624 .align 5 |
10374 | 625 @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) |
626 function ff_add_pixels_clamped_arm, export=1 | |
8072 | 627 push {r4-r10} |
628 mov r10, #8 | |
629 1: | |
630 ldr r4, [r1] /* load dest */ | |
631 /* block[0] and block[1]*/ | |
632 ldrsh r5, [r0] | |
633 ldrsh r7, [r0, #2] | |
634 and r6, r4, #0xFF | |
635 and r8, r4, #0xFF00 | |
636 add r6, r5, r6 | |
637 add r8, r7, r8, lsr #8 | |
638 mvn r5, r5 | |
639 mvn r7, r7 | |
640 tst r6, #0x100 | |
641 movne r6, r5, lsr #24 | |
642 tst r8, #0x100 | |
643 movne r8, r7, lsr #24 | |
644 mov r9, r6 | |
645 ldrsh r5, [r0, #4] /* moved form [A] */ | |
10355 | 646 orr r9, r9, r8, lsl #8 |
8072 | 647 /* block[2] and block[3] */ |
648 /* [A] */ | |
649 ldrsh r7, [r0, #6] | |
650 and r6, r4, #0xFF0000 | |
651 and r8, r4, #0xFF000000 | |
10355 | 652 add r6, r5, r6, lsr #16 |
653 add r8, r7, r8, lsr #24 | |
8072 | 654 mvn r5, r5 |
655 mvn r7, r7 | |
656 tst r6, #0x100 | |
657 movne r6, r5, lsr #24 | |
658 tst r8, #0x100 | |
659 movne r8, r7, lsr #24 | |
10355 | 660 orr r9, r9, r6, lsl #16 |
8072 | 661 ldr r4, [r1, #4] /* moved form [B] */ |
10355 | 662 orr r9, r9, r8, lsl #24 |
8072 | 663 /* store dest */ |
664 ldrsh r5, [r0, #8] /* moved form [C] */ | |
665 str r9, [r1] | |
666 | |
667 /* load dest */ | |
668 /* [B] */ | |
669 /* block[4] and block[5] */ | |
670 /* [C] */ | |
671 ldrsh r7, [r0, #10] | |
672 and r6, r4, #0xFF | |
673 and r8, r4, #0xFF00 | |
674 add r6, r5, r6 | |
10355 | 675 add r8, r7, r8, lsr #8 |
8072 | 676 mvn r5, r5 |
677 mvn r7, r7 | |
678 tst r6, #0x100 | |
679 movne r6, r5, lsr #24 | |
680 tst r8, #0x100 | |
681 movne r8, r7, lsr #24 | |
682 mov r9, r6 | |
683 ldrsh r5, [r0, #12] /* moved from [D] */ | |
10355 | 684 orr r9, r9, r8, lsl #8 |
8072 | 685 /* block[6] and block[7] */ |
686 /* [D] */ | |
687 ldrsh r7, [r0, #14] | |
688 and r6, r4, #0xFF0000 | |
689 and r8, r4, #0xFF000000 | |
10355 | 690 add r6, r5, r6, lsr #16 |
691 add r8, r7, r8, lsr #24 | |
8072 | 692 mvn r5, r5 |
693 mvn r7, r7 | |
694 tst r6, #0x100 | |
695 movne r6, r5, lsr #24 | |
696 tst r8, #0x100 | |
697 movne r8, r7, lsr #24 | |
10355 | 698 orr r9, r9, r6, lsl #16 |
8072 | 699 add r0, r0, #16 /* moved from [E] */ |
10355 | 700 orr r9, r9, r8, lsl #24 |
8072 | 701 subs r10, r10, #1 /* moved from [F] */ |
702 /* store dest */ | |
703 str r9, [r1, #4] | |
704 | |
705 /* [E] */ | |
706 /* [F] */ | |
707 add r1, r1, r2 | |
708 bne 1b | |
709 | |
710 pop {r4-r10} | |
711 bx lr | |
712 .endfunc |