Mercurial > libavcodec.hg
comparison arm/dsputil_arm_s.S @ 10355:47245bb9e85f libavcodec
ARM: whitespace cosmetics
author | mru |
---|---|
date | Sat, 03 Oct 2009 18:22:49 +0000 |
parents | 5698999894ec |
children | f8d0701ff445 |
comparison
equal
deleted
inserted
replaced
10354:24a069f83049 | 10355:47245bb9e85f |
---|---|
29 .endm | 29 .endm |
30 #endif | 30 #endif |
31 | 31 |
32 #if HAVE_ARMV5TE | 32 #if HAVE_ARMV5TE |
33 function ff_prefetch_arm, export=1 | 33 function ff_prefetch_arm, export=1 |
34 subs r2, r2, #1 | 34 subs r2, r2, #1 |
35 pld [r0] | 35 pld [r0] |
36 add r0, r0, r1 | 36 add r0, r0, r1 |
37 bne ff_prefetch_arm | 37 bne ff_prefetch_arm |
38 bx lr | 38 bx lr |
39 .endfunc | 39 .endfunc |
40 #endif | 40 #endif |
41 | 41 |
42 .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 | 42 .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 |
43 mov \Rd0, \Rn0, lsr #(\shift * 8) | 43 mov \Rd0, \Rn0, lsr #(\shift * 8) |
44 mov \Rd1, \Rn1, lsr #(\shift * 8) | 44 mov \Rd1, \Rn1, lsr #(\shift * 8) |
45 mov \Rd2, \Rn2, lsr #(\shift * 8) | 45 mov \Rd2, \Rn2, lsr #(\shift * 8) |
46 mov \Rd3, \Rn3, lsr #(\shift * 8) | 46 mov \Rd3, \Rn3, lsr #(\shift * 8) |
47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) | 47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) |
48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) | 48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) |
49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) | 49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) |
50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) | 50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) |
51 .endm | 51 .endm |
52 .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2 | 52 .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2 |
53 mov \R0, \R0, lsr #(\shift * 8) | 53 mov \R0, \R0, lsr #(\shift * 8) |
54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) | 54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) |
55 mov \R1, \R1, lsr #(\shift * 8) | 55 mov \R1, \R1, lsr #(\shift * 8) |
56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) | 56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) |
57 .endm | 57 .endm |
58 .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 | 58 .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 |
59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) | 59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) |
60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) | 60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) |
61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) | 61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) |
62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) | 62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) |
63 .endm | 63 .endm |
64 | 64 |
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | 65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask |
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | 66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) |
67 @ Rmask = 0xFEFEFEFE | 67 @ Rmask = 0xFEFEFEFE |
68 @ Rn = destroy | 68 @ Rn = destroy |
69 eor \Rd0, \Rn0, \Rm0 | 69 eor \Rd0, \Rn0, \Rm0 |
70 eor \Rd1, \Rn1, \Rm1 | 70 eor \Rd1, \Rn1, \Rm1 |
71 orr \Rn0, \Rn0, \Rm0 | 71 orr \Rn0, \Rn0, \Rm0 |
72 orr \Rn1, \Rn1, \Rm1 | 72 orr \Rn1, \Rn1, \Rm1 |
73 and \Rd0, \Rd0, \Rmask | 73 and \Rd0, \Rd0, \Rmask |
74 and \Rd1, \Rd1, \Rmask | 74 and \Rd1, \Rd1, \Rmask |
75 sub \Rd0, \Rn0, \Rd0, lsr #1 | 75 sub \Rd0, \Rn0, \Rd0, lsr #1 |
76 sub \Rd1, \Rn1, \Rd1, lsr #1 | 76 sub \Rd1, \Rn1, \Rd1, lsr #1 |
77 .endm | 77 .endm |
78 | 78 |
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | 79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask |
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | 80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) |
81 @ Rmask = 0xFEFEFEFE | 81 @ Rmask = 0xFEFEFEFE |
82 @ Rn = destroy | 82 @ Rn = destroy |
83 eor \Rd0, \Rn0, \Rm0 | 83 eor \Rd0, \Rn0, \Rm0 |
84 eor \Rd1, \Rn1, \Rm1 | 84 eor \Rd1, \Rn1, \Rm1 |
85 and \Rn0, \Rn0, \Rm0 | 85 and \Rn0, \Rn0, \Rm0 |
86 and \Rn1, \Rn1, \Rm1 | 86 and \Rn1, \Rn1, \Rm1 |
87 and \Rd0, \Rd0, \Rmask | 87 and \Rd0, \Rd0, \Rmask |
88 and \Rd1, \Rd1, \Rmask | 88 and \Rd1, \Rd1, \Rmask |
89 add \Rd0, \Rn0, \Rd0, lsr #1 | 89 add \Rd0, \Rn0, \Rd0, lsr #1 |
90 add \Rd1, \Rn1, \Rd1, lsr #1 | 90 add \Rd1, \Rn1, \Rd1, lsr #1 |
91 .endm | 91 .endm |
92 | 92 |
93 .macro JMP_ALIGN tmp, reg | 93 .macro JMP_ALIGN tmp, reg |
94 ands \tmp, \reg, #3 | 94 ands \tmp, \reg, #3 |
95 bic \reg, \reg, #3 | 95 bic \reg, \reg, #3 |
96 beq 1f | 96 beq 1f |
97 subs \tmp, \tmp, #1 | 97 subs \tmp, \tmp, #1 |
98 beq 2f | 98 beq 2f |
99 subs \tmp, \tmp, #1 | 99 subs \tmp, \tmp, #1 |
100 beq 3f | 100 beq 3f |
101 b 4f | 101 b 4f |
102 .endm | 102 .endm |
103 | 103 |
104 @ ---------------------------------------------------------------- | 104 @ ---------------------------------------------------------------- |
105 .align 5 | 105 .align 5 |
106 function put_pixels16_arm, export=1 | 106 function put_pixels16_arm, export=1 |
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
108 @ block = word aligned, pixles = unaligned | 108 @ block = word aligned, pixles = unaligned |
109 pld [r1] | 109 pld [r1] |
110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR | 110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR |
111 JMP_ALIGN r5, r1 | 111 JMP_ALIGN r5, r1 |
112 1: | 112 1: |
113 ldmia r1, {r4-r7} | 113 ldmia r1, {r4-r7} |
114 add r1, r1, r2 | 114 add r1, r1, r2 |
115 stmia r0, {r4-r7} | 115 stmia r0, {r4-r7} |
116 pld [r1] | 116 pld [r1] |
117 subs r3, r3, #1 | 117 subs r3, r3, #1 |
118 add r0, r0, r2 | 118 add r0, r0, r2 |
119 bne 1b | 119 bne 1b |
120 ldmfd sp!, {r4-r11, pc} | 120 ldmfd sp!, {r4-r11, pc} |
121 .align 5 | 121 .align 5 |
122 2: | 122 2: |
123 ldmia r1, {r4-r8} | 123 ldmia r1, {r4-r8} |
124 add r1, r1, r2 | 124 add r1, r1, r2 |
125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
126 pld [r1] | 126 pld [r1] |
127 subs r3, r3, #1 | 127 subs r3, r3, #1 |
128 stmia r0, {r9-r12} | 128 stmia r0, {r9-r12} |
129 add r0, r0, r2 | 129 add r0, r0, r2 |
130 bne 2b | 130 bne 2b |
131 ldmfd sp!, {r4-r11, pc} | 131 ldmfd sp!, {r4-r11, pc} |
132 .align 5 | 132 .align 5 |
133 3: | 133 3: |
134 ldmia r1, {r4-r8} | 134 ldmia r1, {r4-r8} |
135 add r1, r1, r2 | 135 add r1, r1, r2 |
136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
137 pld [r1] | 137 pld [r1] |
138 subs r3, r3, #1 | 138 subs r3, r3, #1 |
139 stmia r0, {r9-r12} | 139 stmia r0, {r9-r12} |
140 add r0, r0, r2 | 140 add r0, r0, r2 |
141 bne 3b | 141 bne 3b |
142 ldmfd sp!, {r4-r11, pc} | 142 ldmfd sp!, {r4-r11, pc} |
143 .align 5 | 143 .align 5 |
144 4: | 144 4: |
145 ldmia r1, {r4-r8} | 145 ldmia r1, {r4-r8} |
146 add r1, r1, r2 | 146 add r1, r1, r2 |
147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
148 pld [r1] | 148 pld [r1] |
149 subs r3, r3, #1 | 149 subs r3, r3, #1 |
150 stmia r0, {r9-r12} | 150 stmia r0, {r9-r12} |
151 add r0, r0, r2 | 151 add r0, r0, r2 |
152 bne 4b | 152 bne 4b |
153 ldmfd sp!, {r4-r11,pc} | 153 ldmfd sp!, {r4-r11,pc} |
154 .endfunc | 154 .endfunc |
155 | 155 |
156 @ ---------------------------------------------------------------- | 156 @ ---------------------------------------------------------------- |
157 .align 5 | 157 .align 5 |
158 function put_pixels8_arm, export=1 | 158 function put_pixels8_arm, export=1 |
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
160 @ block = word aligned, pixles = unaligned | 160 @ block = word aligned, pixles = unaligned |
161 pld [r1] | 161 pld [r1] |
162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR | 162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR |
163 JMP_ALIGN r5, r1 | 163 JMP_ALIGN r5, r1 |
164 1: | 164 1: |
165 ldmia r1, {r4-r5} | 165 ldmia r1, {r4-r5} |
166 add r1, r1, r2 | 166 add r1, r1, r2 |
167 subs r3, r3, #1 | 167 subs r3, r3, #1 |
168 pld [r1] | 168 pld [r1] |
169 stmia r0, {r4-r5} | 169 stmia r0, {r4-r5} |
170 add r0, r0, r2 | 170 add r0, r0, r2 |
171 bne 1b | 171 bne 1b |
172 ldmfd sp!, {r4-r5,pc} | 172 ldmfd sp!, {r4-r5,pc} |
173 .align 5 | 173 .align 5 |
174 2: | 174 2: |
175 ldmia r1, {r4-r5, r12} | 175 ldmia r1, {r4-r5, r12} |
176 add r1, r1, r2 | 176 add r1, r1, r2 |
177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 | 177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 |
178 pld [r1] | 178 pld [r1] |
179 subs r3, r3, #1 | 179 subs r3, r3, #1 |
180 stmia r0, {r4-r5} | 180 stmia r0, {r4-r5} |
181 add r0, r0, r2 | 181 add r0, r0, r2 |
182 bne 2b | 182 bne 2b |
183 ldmfd sp!, {r4-r5,pc} | 183 ldmfd sp!, {r4-r5,pc} |
184 .align 5 | 184 .align 5 |
185 3: | 185 3: |
186 ldmia r1, {r4-r5, r12} | 186 ldmia r1, {r4-r5, r12} |
187 add r1, r1, r2 | 187 add r1, r1, r2 |
188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 | 188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 |
189 pld [r1] | 189 pld [r1] |
190 subs r3, r3, #1 | 190 subs r3, r3, #1 |
191 stmia r0, {r4-r5} | 191 stmia r0, {r4-r5} |
192 add r0, r0, r2 | 192 add r0, r0, r2 |
193 bne 3b | 193 bne 3b |
194 ldmfd sp!, {r4-r5,pc} | 194 ldmfd sp!, {r4-r5,pc} |
195 .align 5 | 195 .align 5 |
196 4: | 196 4: |
197 ldmia r1, {r4-r5, r12} | 197 ldmia r1, {r4-r5, r12} |
198 add r1, r1, r2 | 198 add r1, r1, r2 |
199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 | 199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 |
200 pld [r1] | 200 pld [r1] |
201 subs r3, r3, #1 | 201 subs r3, r3, #1 |
202 stmia r0, {r4-r5} | 202 stmia r0, {r4-r5} |
203 add r0, r0, r2 | 203 add r0, r0, r2 |
204 bne 4b | 204 bne 4b |
205 ldmfd sp!, {r4-r5,pc} | 205 ldmfd sp!, {r4-r5,pc} |
206 .endfunc | 206 .endfunc |
207 | 207 |
208 @ ---------------------------------------------------------------- | 208 @ ---------------------------------------------------------------- |
209 .align 5 | 209 .align 5 |
210 function put_pixels8_x2_arm, export=1 | 210 function put_pixels8_x2_arm, export=1 |
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
212 @ block = word aligned, pixles = unaligned | 212 @ block = word aligned, pixles = unaligned |
213 pld [r1] | 213 pld [r1] |
214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR | 214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR |
215 ldr r12, =0xfefefefe | 215 ldr r12, =0xfefefefe |
216 JMP_ALIGN r5, r1 | 216 JMP_ALIGN r5, r1 |
217 1: | 217 1: |
218 ldmia r1, {r4-r5, r10} | 218 ldmia r1, {r4-r5, r10} |
219 add r1, r1, r2 | 219 add r1, r1, r2 |
220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
221 pld [r1] | 221 pld [r1] |
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
223 subs r3, r3, #1 | 223 subs r3, r3, #1 |
224 stmia r0, {r8-r9} | 224 stmia r0, {r8-r9} |
225 add r0, r0, r2 | 225 add r0, r0, r2 |
226 bne 1b | 226 bne 1b |
227 ldmfd sp!, {r4-r10,pc} | 227 ldmfd sp!, {r4-r10,pc} |
228 .align 5 | 228 .align 5 |
229 2: | 229 2: |
230 ldmia r1, {r4-r5, r10} | 230 ldmia r1, {r4-r5, r10} |
231 add r1, r1, r2 | 231 add r1, r1, r2 |
232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 | 233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 |
234 pld [r1] | 234 pld [r1] |
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
236 subs r3, r3, #1 | 236 subs r3, r3, #1 |
237 stmia r0, {r4-r5} | 237 stmia r0, {r4-r5} |
238 add r0, r0, r2 | 238 add r0, r0, r2 |
239 bne 2b | 239 bne 2b |
240 ldmfd sp!, {r4-r10,pc} | 240 ldmfd sp!, {r4-r10,pc} |
241 .align 5 | 241 .align 5 |
242 3: | 242 3: |
243 ldmia r1, {r4-r5, r10} | 243 ldmia r1, {r4-r5, r10} |
244 add r1, r1, r2 | 244 add r1, r1, r2 |
245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 | 245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 |
246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 | 246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 |
247 pld [r1] | 247 pld [r1] |
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
249 subs r3, r3, #1 | 249 subs r3, r3, #1 |
250 stmia r0, {r4-r5} | 250 stmia r0, {r4-r5} |
251 add r0, r0, r2 | 251 add r0, r0, r2 |
252 bne 3b | 252 bne 3b |
253 ldmfd sp!, {r4-r10,pc} | 253 ldmfd sp!, {r4-r10,pc} |
254 .align 5 | 254 .align 5 |
255 4: | 255 4: |
256 ldmia r1, {r4-r5, r10} | 256 ldmia r1, {r4-r5, r10} |
257 add r1, r1, r2 | 257 add r1, r1, r2 |
258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 | 258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 |
259 pld [r1] | 259 pld [r1] |
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | 260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 |
261 subs r3, r3, #1 | 261 subs r3, r3, #1 |
262 stmia r0, {r8-r9} | 262 stmia r0, {r8-r9} |
263 add r0, r0, r2 | 263 add r0, r0, r2 |
264 bne 4b | 264 bne 4b |
265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. | 265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. |
266 .endfunc | 266 .endfunc |
267 | 267 |
268 .align 5 | 268 .align 5 |
269 function put_no_rnd_pixels8_x2_arm, export=1 | 269 function put_no_rnd_pixels8_x2_arm, export=1 |
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
271 @ block = word aligned, pixles = unaligned | 271 @ block = word aligned, pixles = unaligned |
272 pld [r1] | 272 pld [r1] |
273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR | 273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR |
274 ldr r12, =0xfefefefe | 274 ldr r12, =0xfefefefe |
275 JMP_ALIGN r5, r1 | 275 JMP_ALIGN r5, r1 |
276 1: | 276 1: |
277 ldmia r1, {r4-r5, r10} | 277 ldmia r1, {r4-r5, r10} |
278 add r1, r1, r2 | 278 add r1, r1, r2 |
279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
280 pld [r1] | 280 pld [r1] |
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
282 subs r3, r3, #1 | 282 subs r3, r3, #1 |
283 stmia r0, {r8-r9} | 283 stmia r0, {r8-r9} |
284 add r0, r0, r2 | 284 add r0, r0, r2 |
285 bne 1b | 285 bne 1b |
286 ldmfd sp!, {r4-r10,pc} | 286 ldmfd sp!, {r4-r10,pc} |
287 .align 5 | 287 .align 5 |
288 2: | 288 2: |
289 ldmia r1, {r4-r5, r10} | 289 ldmia r1, {r4-r5, r10} |
290 add r1, r1, r2 | 290 add r1, r1, r2 |
291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 | 292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 |
293 pld [r1] | 293 pld [r1] |
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
295 subs r3, r3, #1 | 295 subs r3, r3, #1 |
296 stmia r0, {r4-r5} | 296 stmia r0, {r4-r5} |
297 add r0, r0, r2 | 297 add r0, r0, r2 |
298 bne 2b | 298 bne 2b |
299 ldmfd sp!, {r4-r10,pc} | 299 ldmfd sp!, {r4-r10,pc} |
300 .align 5 | 300 .align 5 |
301 3: | 301 3: |
302 ldmia r1, {r4-r5, r10} | 302 ldmia r1, {r4-r5, r10} |
303 add r1, r1, r2 | 303 add r1, r1, r2 |
304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 | 304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 |
305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 | 305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 |
306 pld [r1] | 306 pld [r1] |
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
308 subs r3, r3, #1 | 308 subs r3, r3, #1 |
309 stmia r0, {r4-r5} | 309 stmia r0, {r4-r5} |
310 add r0, r0, r2 | 310 add r0, r0, r2 |
311 bne 3b | 311 bne 3b |
312 ldmfd sp!, {r4-r10,pc} | 312 ldmfd sp!, {r4-r10,pc} |
313 .align 5 | 313 .align 5 |
314 4: | 314 4: |
315 ldmia r1, {r4-r5, r10} | 315 ldmia r1, {r4-r5, r10} |
316 add r1, r1, r2 | 316 add r1, r1, r2 |
317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 | 317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 |
318 pld [r1] | 318 pld [r1] |
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | 319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 |
320 subs r3, r3, #1 | 320 subs r3, r3, #1 |
321 stmia r0, {r8-r9} | 321 stmia r0, {r8-r9} |
322 add r0, r0, r2 | 322 add r0, r0, r2 |
323 bne 4b | 323 bne 4b |
324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. | 324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. |
325 .endfunc | 325 .endfunc |
326 | 326 |
327 | 327 |
328 @ ---------------------------------------------------------------- | 328 @ ---------------------------------------------------------------- |
329 .align 5 | 329 .align 5 |
330 function put_pixels8_y2_arm, export=1 | 330 function put_pixels8_y2_arm, export=1 |
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
332 @ block = word aligned, pixles = unaligned | 332 @ block = word aligned, pixles = unaligned |
333 pld [r1] | 333 pld [r1] |
334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
335 mov r3, r3, lsr #1 | 335 mov r3, r3, lsr #1 |
336 ldr r12, =0xfefefefe | 336 ldr r12, =0xfefefefe |
337 JMP_ALIGN r5, r1 | 337 JMP_ALIGN r5, r1 |
338 1: | 338 1: |
339 ldmia r1, {r4-r5} | 339 ldmia r1, {r4-r5} |
340 add r1, r1, r2 | 340 add r1, r1, r2 |
341 6: ldmia r1, {r6-r7} | 341 6: ldmia r1, {r6-r7} |
342 add r1, r1, r2 | 342 add r1, r1, r2 |
343 pld [r1] | 343 pld [r1] |
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
345 ldmia r1, {r4-r5} | 345 ldmia r1, {r4-r5} |
346 add r1, r1, r2 | 346 add r1, r1, r2 |
347 stmia r0, {r8-r9} | 347 stmia r0, {r8-r9} |
348 add r0, r0, r2 | 348 add r0, r0, r2 |
349 pld [r1] | 349 pld [r1] |
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | 350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 |
351 subs r3, r3, #1 | 351 subs r3, r3, #1 |
352 stmia r0, {r8-r9} | 352 stmia r0, {r8-r9} |
353 add r0, r0, r2 | 353 add r0, r0, r2 |
354 bne 6b | 354 bne 6b |
355 ldmfd sp!, {r4-r11,pc} | 355 ldmfd sp!, {r4-r11,pc} |
356 .align 5 | 356 .align 5 |
357 2: | 357 2: |
358 ldmia r1, {r4-r6} | 358 ldmia r1, {r4-r6} |
359 add r1, r1, r2 | 359 add r1, r1, r2 |
360 pld [r1] | 360 pld [r1] |
361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
362 6: ldmia r1, {r7-r9} | 362 6: ldmia r1, {r7-r9} |
363 add r1, r1, r2 | 363 add r1, r1, r2 |
364 pld [r1] | 364 pld [r1] |
365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 | 365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 |
366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
367 stmia r0, {r10-r11} | 367 stmia r0, {r10-r11} |
368 add r0, r0, r2 | 368 add r0, r0, r2 |
369 ldmia r1, {r4-r6} | 369 ldmia r1, {r4-r6} |
370 add r1, r1, r2 | 370 add r1, r1, r2 |
371 pld [r1] | 371 pld [r1] |
372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
373 subs r3, r3, #1 | 373 subs r3, r3, #1 |
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
375 stmia r0, {r10-r11} | 375 stmia r0, {r10-r11} |
376 add r0, r0, r2 | 376 add r0, r0, r2 |
377 bne 6b | 377 bne 6b |
378 ldmfd sp!, {r4-r11,pc} | 378 ldmfd sp!, {r4-r11,pc} |
379 .align 5 | 379 .align 5 |
380 3: | 380 3: |
381 ldmia r1, {r4-r6} | 381 ldmia r1, {r4-r6} |
382 add r1, r1, r2 | 382 add r1, r1, r2 |
383 pld [r1] | 383 pld [r1] |
384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
385 6: ldmia r1, {r7-r9} | 385 6: ldmia r1, {r7-r9} |
386 add r1, r1, r2 | 386 add r1, r1, r2 |
387 pld [r1] | 387 pld [r1] |
388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 | 388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 |
389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
390 stmia r0, {r10-r11} | 390 stmia r0, {r10-r11} |
391 add r0, r0, r2 | 391 add r0, r0, r2 |
392 ldmia r1, {r4-r6} | 392 ldmia r1, {r4-r6} |
393 add r1, r1, r2 | 393 add r1, r1, r2 |
394 pld [r1] | 394 pld [r1] |
395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
396 subs r3, r3, #1 | 396 subs r3, r3, #1 |
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
398 stmia r0, {r10-r11} | 398 stmia r0, {r10-r11} |
399 add r0, r0, r2 | 399 add r0, r0, r2 |
400 bne 6b | 400 bne 6b |
401 ldmfd sp!, {r4-r11,pc} | 401 ldmfd sp!, {r4-r11,pc} |
402 .align 5 | 402 .align 5 |
403 4: | 403 4: |
404 ldmia r1, {r4-r6} | 404 ldmia r1, {r4-r6} |
405 add r1, r1, r2 | 405 add r1, r1, r2 |
406 pld [r1] | 406 pld [r1] |
407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
408 6: ldmia r1, {r7-r9} | 408 6: ldmia r1, {r7-r9} |
409 add r1, r1, r2 | 409 add r1, r1, r2 |
410 pld [r1] | 410 pld [r1] |
411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 | 411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 |
412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
413 stmia r0, {r10-r11} | 413 stmia r0, {r10-r11} |
414 add r0, r0, r2 | 414 add r0, r0, r2 |
415 ldmia r1, {r4-r6} | 415 ldmia r1, {r4-r6} |
416 add r1, r1, r2 | 416 add r1, r1, r2 |
417 pld [r1] | 417 pld [r1] |
418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
419 subs r3, r3, #1 | 419 subs r3, r3, #1 |
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
421 stmia r0, {r10-r11} | 421 stmia r0, {r10-r11} |
422 add r0, r0, r2 | 422 add r0, r0, r2 |
423 bne 6b | 423 bne 6b |
424 ldmfd sp!, {r4-r11,pc} | 424 ldmfd sp!, {r4-r11,pc} |
425 .endfunc | 425 .endfunc |
426 | 426 |
427 .align 5 | 427 .align 5 |
428 function put_no_rnd_pixels8_y2_arm, export=1 | 428 function put_no_rnd_pixels8_y2_arm, export=1 |
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
430 @ block = word aligned, pixles = unaligned | 430 @ block = word aligned, pixles = unaligned |
431 pld [r1] | 431 pld [r1] |
432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
433 mov r3, r3, lsr #1 | 433 mov r3, r3, lsr #1 |
434 ldr r12, =0xfefefefe | 434 ldr r12, =0xfefefefe |
435 JMP_ALIGN r5, r1 | 435 JMP_ALIGN r5, r1 |
436 1: | 436 1: |
437 ldmia r1, {r4-r5} | 437 ldmia r1, {r4-r5} |
438 add r1, r1, r2 | 438 add r1, r1, r2 |
439 6: ldmia r1, {r6-r7} | 439 6: ldmia r1, {r6-r7} |
440 add r1, r1, r2 | 440 add r1, r1, r2 |
441 pld [r1] | 441 pld [r1] |
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
443 ldmia r1, {r4-r5} | 443 ldmia r1, {r4-r5} |
444 add r1, r1, r2 | 444 add r1, r1, r2 |
445 stmia r0, {r8-r9} | 445 stmia r0, {r8-r9} |
446 add r0, r0, r2 | 446 add r0, r0, r2 |
447 pld [r1] | 447 pld [r1] |
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | 448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 |
449 subs r3, r3, #1 | 449 subs r3, r3, #1 |
450 stmia r0, {r8-r9} | 450 stmia r0, {r8-r9} |
451 add r0, r0, r2 | 451 add r0, r0, r2 |
452 bne 6b | 452 bne 6b |
453 ldmfd sp!, {r4-r11,pc} | 453 ldmfd sp!, {r4-r11,pc} |
454 .align 5 | 454 .align 5 |
455 2: | 455 2: |
456 ldmia r1, {r4-r6} | 456 ldmia r1, {r4-r6} |
457 add r1, r1, r2 | 457 add r1, r1, r2 |
458 pld [r1] | 458 pld [r1] |
459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
460 6: ldmia r1, {r7-r9} | 460 6: ldmia r1, {r7-r9} |
461 add r1, r1, r2 | 461 add r1, r1, r2 |
462 pld [r1] | 462 pld [r1] |
463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 | 463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 |
464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
465 stmia r0, {r10-r11} | 465 stmia r0, {r10-r11} |
466 add r0, r0, r2 | 466 add r0, r0, r2 |
467 ldmia r1, {r4-r6} | 467 ldmia r1, {r4-r6} |
468 add r1, r1, r2 | 468 add r1, r1, r2 |
469 pld [r1] | 469 pld [r1] |
470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
471 subs r3, r3, #1 | 471 subs r3, r3, #1 |
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
473 stmia r0, {r10-r11} | 473 stmia r0, {r10-r11} |
474 add r0, r0, r2 | 474 add r0, r0, r2 |
475 bne 6b | 475 bne 6b |
476 ldmfd sp!, {r4-r11,pc} | 476 ldmfd sp!, {r4-r11,pc} |
477 .align 5 | 477 .align 5 |
478 3: | 478 3: |
479 ldmia r1, {r4-r6} | 479 ldmia r1, {r4-r6} |
480 add r1, r1, r2 | 480 add r1, r1, r2 |
481 pld [r1] | 481 pld [r1] |
482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
483 6: ldmia r1, {r7-r9} | 483 6: ldmia r1, {r7-r9} |
484 add r1, r1, r2 | 484 add r1, r1, r2 |
485 pld [r1] | 485 pld [r1] |
486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 | 486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 |
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
488 stmia r0, {r10-r11} | 488 stmia r0, {r10-r11} |
489 add r0, r0, r2 | 489 add r0, r0, r2 |
490 ldmia r1, {r4-r6} | 490 ldmia r1, {r4-r6} |
491 add r1, r1, r2 | 491 add r1, r1, r2 |
492 pld [r1] | 492 pld [r1] |
493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
494 subs r3, r3, #1 | 494 subs r3, r3, #1 |
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
496 stmia r0, {r10-r11} | 496 stmia r0, {r10-r11} |
497 add r0, r0, r2 | 497 add r0, r0, r2 |
498 bne 6b | 498 bne 6b |
499 ldmfd sp!, {r4-r11,pc} | 499 ldmfd sp!, {r4-r11,pc} |
500 .align 5 | 500 .align 5 |
501 4: | 501 4: |
502 ldmia r1, {r4-r6} | 502 ldmia r1, {r4-r6} |
503 add r1, r1, r2 | 503 add r1, r1, r2 |
504 pld [r1] | 504 pld [r1] |
505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
506 6: ldmia r1, {r7-r9} | 506 6: ldmia r1, {r7-r9} |
507 add r1, r1, r2 | 507 add r1, r1, r2 |
508 pld [r1] | 508 pld [r1] |
509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 | 509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 |
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
511 stmia r0, {r10-r11} | 511 stmia r0, {r10-r11} |
512 add r0, r0, r2 | 512 add r0, r0, r2 |
513 ldmia r1, {r4-r6} | 513 ldmia r1, {r4-r6} |
514 add r1, r1, r2 | 514 add r1, r1, r2 |
515 pld [r1] | 515 pld [r1] |
516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
517 subs r3, r3, #1 | 517 subs r3, r3, #1 |
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
519 stmia r0, {r10-r11} | 519 stmia r0, {r10-r11} |
520 add r0, r0, r2 | 520 add r0, r0, r2 |
521 bne 6b | 521 bne 6b |
522 ldmfd sp!, {r4-r11,pc} | 522 ldmfd sp!, {r4-r11,pc} |
523 .endfunc | 523 .endfunc |
524 | 524 |
525 .ltorg | 525 .ltorg |
526 | 526 |
527 @ ---------------------------------------------------------------- | 527 @ ---------------------------------------------------------------- |
528 .macro RND_XY2_IT align, rnd | 528 .macro RND_XY2_IT align, rnd |
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) | 529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) |
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) | 530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) |
531 .if \align == 0 | 531 .if \align == 0 |
532 ldmia r1, {r6-r8} | 532 ldmia r1, {r6-r8} |
533 .elseif \align == 3 | 533 .elseif \align == 3 |
534 ldmia r1, {r5-r7} | 534 ldmia r1, {r5-r7} |
535 .else | 535 .else |
536 ldmia r1, {r8-r10} | 536 ldmia r1, {r8-r10} |
537 .endif | 537 .endif |
538 add r1, r1, r2 | 538 add r1, r1, r2 |
539 pld [r1] | 539 pld [r1] |
540 .if \align == 0 | 540 .if \align == 0 |
541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 | 541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 |
542 .elseif \align == 1 | 542 .elseif \align == 1 |
543 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10 | 543 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10 |
544 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10 | 544 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10 |
545 .elseif \align == 2 | 545 .elseif \align == 2 |
546 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10 | 546 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10 |
547 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10 | 547 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10 |
548 .elseif \align == 3 | 548 .elseif \align == 3 |
549 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7 | 549 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7 |
550 .endif | 550 .endif |
551 ldr r14, =0x03030303 | 551 ldr r14, =0x03030303 |
552 tst r3, #1 | 552 tst r3, #1 |
553 and r8, r4, r14 | 553 and r8, r4, r14 |
554 and r9, r5, r14 | 554 and r9, r5, r14 |
555 and r10, r6, r14 | 555 and r10, r6, r14 |
556 and r11, r7, r14 | 556 and r11, r7, r14 |
557 andeq r14, r14, r14, \rnd #1 | 557 andeq r14, r14, r14, \rnd #1 |
558 add r8, r8, r10 | 558 add r8, r8, r10 |
559 add r9, r9, r11 | 559 add r9, r9, r11 |
560 ldr r12, =0xfcfcfcfc >> 2 | 560 ldr r12, =0xfcfcfcfc >> 2 |
561 addeq r8, r8, r14 | 561 addeq r8, r8, r14 |
562 addeq r9, r9, r14 | 562 addeq r9, r9, r14 |
563 and r4, r12, r4, lsr #2 | 563 and r4, r12, r4, lsr #2 |
564 and r5, r12, r5, lsr #2 | 564 and r5, r12, r5, lsr #2 |
565 and r6, r12, r6, lsr #2 | 565 and r6, r12, r6, lsr #2 |
566 and r7, r12, r7, lsr #2 | 566 and r7, r12, r7, lsr #2 |
567 add r10, r4, r6 | 567 add r10, r4, r6 |
568 add r11, r5, r7 | 568 add r11, r5, r7 |
569 subs r3, r3, #1 | 569 subs r3, r3, #1 |
570 .endm | 570 .endm |
571 | 571 |
572 .macro RND_XY2_EXPAND align, rnd | 572 .macro RND_XY2_EXPAND align, rnd |
573 RND_XY2_IT \align, \rnd | 573 RND_XY2_IT \align, \rnd |
574 6: stmfd sp!, {r8-r11} | 574 6: stmfd sp!, {r8-r11} |
575 RND_XY2_IT \align, \rnd | 575 RND_XY2_IT \align, \rnd |
576 ldmfd sp!, {r4-r7} | 576 ldmfd sp!, {r4-r7} |
577 add r4, r4, r8 | 577 add r4, r4, r8 |
578 add r5, r5, r9 | 578 add r5, r5, r9 |
579 ldr r14, =0x0f0f0f0f | 579 ldr r14, =0x0f0f0f0f |
580 add r6, r6, r10 | 580 add r6, r6, r10 |
581 add r7, r7, r11 | 581 add r7, r7, r11 |
582 and r4, r14, r4, lsr #2 | 582 and r4, r14, r4, lsr #2 |
583 and r5, r14, r5, lsr #2 | 583 and r5, r14, r5, lsr #2 |
584 add r4, r4, r6 | 584 add r4, r4, r6 |
585 add r5, r5, r7 | 585 add r5, r5, r7 |
586 stmia r0, {r4-r5} | 586 stmia r0, {r4-r5} |
587 add r0, r0, r2 | 587 add r0, r0, r2 |
588 bge 6b | 588 bge 6b |
589 ldmfd sp!, {r4-r11,pc} | 589 ldmfd sp!, {r4-r11,pc} |
590 .endm | 590 .endm |
591 | 591 |
592 .align 5 | 592 .align 5 |
593 function put_pixels8_xy2_arm, export=1 | 593 function put_pixels8_xy2_arm, export=1 |
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
595 @ block = word aligned, pixles = unaligned | 595 @ block = word aligned, pixles = unaligned |
596 pld [r1] | 596 pld [r1] |
597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
598 JMP_ALIGN r5, r1 | 598 JMP_ALIGN r5, r1 |
599 1: | 599 1: |
600 RND_XY2_EXPAND 0, lsl | 600 RND_XY2_EXPAND 0, lsl |
601 | 601 |
602 .align 5 | 602 .align 5 |
603 2: | 603 2: |
604 RND_XY2_EXPAND 1, lsl | 604 RND_XY2_EXPAND 1, lsl |
605 | 605 |
606 .align 5 | 606 .align 5 |
607 3: | 607 3: |
608 RND_XY2_EXPAND 2, lsl | 608 RND_XY2_EXPAND 2, lsl |
609 | 609 |
610 .align 5 | 610 .align 5 |
611 4: | 611 4: |
612 RND_XY2_EXPAND 3, lsl | 612 RND_XY2_EXPAND 3, lsl |
613 .endfunc | 613 .endfunc |
614 | 614 |
615 .align 5 | 615 .align 5 |
616 function put_no_rnd_pixels8_xy2_arm, export=1 | 616 function put_no_rnd_pixels8_xy2_arm, export=1 |
617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
618 @ block = word aligned, pixles = unaligned | 618 @ block = word aligned, pixles = unaligned |
619 pld [r1] | 619 pld [r1] |
620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
621 JMP_ALIGN r5, r1 | 621 JMP_ALIGN r5, r1 |
622 1: | 622 1: |
623 RND_XY2_EXPAND 0, lsr | 623 RND_XY2_EXPAND 0, lsr |
624 | 624 |
625 .align 5 | 625 .align 5 |
626 2: | 626 2: |
627 RND_XY2_EXPAND 1, lsr | 627 RND_XY2_EXPAND 1, lsr |
628 | 628 |
629 .align 5 | 629 .align 5 |
630 3: | 630 3: |
631 RND_XY2_EXPAND 2, lsr | 631 RND_XY2_EXPAND 2, lsr |
632 | 632 |
633 .align 5 | 633 .align 5 |
634 4: | 634 4: |
635 RND_XY2_EXPAND 3, lsr | 635 RND_XY2_EXPAND 3, lsr |
636 .endfunc | 636 .endfunc |
637 | 637 |
638 .align 5 | 638 .align 5 |
639 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) | 639 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) |
640 function ff_add_pixels_clamped_ARM, export=1 | 640 function ff_add_pixels_clamped_ARM, export=1 |
655 movne r6, r5, lsr #24 | 655 movne r6, r5, lsr #24 |
656 tst r8, #0x100 | 656 tst r8, #0x100 |
657 movne r8, r7, lsr #24 | 657 movne r8, r7, lsr #24 |
658 mov r9, r6 | 658 mov r9, r6 |
659 ldrsh r5, [r0, #4] /* moved form [A] */ | 659 ldrsh r5, [r0, #4] /* moved form [A] */ |
660 orr r9, r9, r8, lsl #8 | 660 orr r9, r9, r8, lsl #8 |
661 /* block[2] and block[3] */ | 661 /* block[2] and block[3] */ |
662 /* [A] */ | 662 /* [A] */ |
663 ldrsh r7, [r0, #6] | 663 ldrsh r7, [r0, #6] |
664 and r6, r4, #0xFF0000 | 664 and r6, r4, #0xFF0000 |
665 and r8, r4, #0xFF000000 | 665 and r8, r4, #0xFF000000 |
666 add r6, r5, r6, lsr #16 | 666 add r6, r5, r6, lsr #16 |
667 add r8, r7, r8, lsr #24 | 667 add r8, r7, r8, lsr #24 |
668 mvn r5, r5 | 668 mvn r5, r5 |
669 mvn r7, r7 | 669 mvn r7, r7 |
670 tst r6, #0x100 | 670 tst r6, #0x100 |
671 movne r6, r5, lsr #24 | 671 movne r6, r5, lsr #24 |
672 tst r8, #0x100 | 672 tst r8, #0x100 |
673 movne r8, r7, lsr #24 | 673 movne r8, r7, lsr #24 |
674 orr r9, r9, r6, lsl #16 | 674 orr r9, r9, r6, lsl #16 |
675 ldr r4, [r1, #4] /* moved form [B] */ | 675 ldr r4, [r1, #4] /* moved form [B] */ |
676 orr r9, r9, r8, lsl #24 | 676 orr r9, r9, r8, lsl #24 |
677 /* store dest */ | 677 /* store dest */ |
678 ldrsh r5, [r0, #8] /* moved form [C] */ | 678 ldrsh r5, [r0, #8] /* moved form [C] */ |
679 str r9, [r1] | 679 str r9, [r1] |
680 | 680 |
681 /* load dest */ | 681 /* load dest */ |
684 /* [C] */ | 684 /* [C] */ |
685 ldrsh r7, [r0, #10] | 685 ldrsh r7, [r0, #10] |
686 and r6, r4, #0xFF | 686 and r6, r4, #0xFF |
687 and r8, r4, #0xFF00 | 687 and r8, r4, #0xFF00 |
688 add r6, r5, r6 | 688 add r6, r5, r6 |
689 add r8, r7, r8, lsr #8 | 689 add r8, r7, r8, lsr #8 |
690 mvn r5, r5 | 690 mvn r5, r5 |
691 mvn r7, r7 | 691 mvn r7, r7 |
692 tst r6, #0x100 | 692 tst r6, #0x100 |
693 movne r6, r5, lsr #24 | 693 movne r6, r5, lsr #24 |
694 tst r8, #0x100 | 694 tst r8, #0x100 |
695 movne r8, r7, lsr #24 | 695 movne r8, r7, lsr #24 |
696 mov r9, r6 | 696 mov r9, r6 |
697 ldrsh r5, [r0, #12] /* moved from [D] */ | 697 ldrsh r5, [r0, #12] /* moved from [D] */ |
698 orr r9, r9, r8, lsl #8 | 698 orr r9, r9, r8, lsl #8 |
699 /* block[6] and block[7] */ | 699 /* block[6] and block[7] */ |
700 /* [D] */ | 700 /* [D] */ |
701 ldrsh r7, [r0, #14] | 701 ldrsh r7, [r0, #14] |
702 and r6, r4, #0xFF0000 | 702 and r6, r4, #0xFF0000 |
703 and r8, r4, #0xFF000000 | 703 and r8, r4, #0xFF000000 |
704 add r6, r5, r6, lsr #16 | 704 add r6, r5, r6, lsr #16 |
705 add r8, r7, r8, lsr #24 | 705 add r8, r7, r8, lsr #24 |
706 mvn r5, r5 | 706 mvn r5, r5 |
707 mvn r7, r7 | 707 mvn r7, r7 |
708 tst r6, #0x100 | 708 tst r6, #0x100 |
709 movne r6, r5, lsr #24 | 709 movne r6, r5, lsr #24 |
710 tst r8, #0x100 | 710 tst r8, #0x100 |
711 movne r8, r7, lsr #24 | 711 movne r8, r7, lsr #24 |
712 orr r9, r9, r6, lsl #16 | 712 orr r9, r9, r6, lsl #16 |
713 add r0, r0, #16 /* moved from [E] */ | 713 add r0, r0, #16 /* moved from [E] */ |
714 orr r9, r9, r8, lsl #24 | 714 orr r9, r9, r8, lsl #24 |
715 subs r10, r10, #1 /* moved from [F] */ | 715 subs r10, r10, #1 /* moved from [F] */ |
716 /* store dest */ | 716 /* store dest */ |
717 str r9, [r1, #4] | 717 str r9, [r1, #4] |
718 | 718 |
719 /* [E] */ | 719 /* [E] */ |