Mercurial > libavcodec.hg
comparison arm/dsputil_arm_s.S @ 10356:f8d0701ff445 libavcodec
ARM: update ldm/stm instructions to modern syntax
author | mru |
---|---|
date | Sat, 03 Oct 2009 18:22:52 +0000 |
parents | 47245bb9e85f |
children | ada571517e88 |
comparison
equal
deleted
inserted
replaced
10355:47245bb9e85f | 10356:f8d0701ff445 |
---|---|
105 .align 5 | 105 .align 5 |
106 function put_pixels16_arm, export=1 | 106 function put_pixels16_arm, export=1 |
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
108 @ block = word aligned, pixles = unaligned | 108 @ block = word aligned, pixles = unaligned |
109 pld [r1] | 109 pld [r1] |
110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR | 110 push {r4-r11, lr} |
111 JMP_ALIGN r5, r1 | 111 JMP_ALIGN r5, r1 |
112 1: | 112 1: |
113 ldmia r1, {r4-r7} | 113 ldm r1, {r4-r7} |
114 add r1, r1, r2 | 114 add r1, r1, r2 |
115 stmia r0, {r4-r7} | 115 stm r0, {r4-r7} |
116 pld [r1] | 116 pld [r1] |
117 subs r3, r3, #1 | 117 subs r3, r3, #1 |
118 add r0, r0, r2 | 118 add r0, r0, r2 |
119 bne 1b | 119 bne 1b |
120 ldmfd sp!, {r4-r11, pc} | 120 pop {r4-r11, pc} |
121 .align 5 | 121 .align 5 |
122 2: | 122 2: |
123 ldmia r1, {r4-r8} | 123 ldm r1, {r4-r8} |
124 add r1, r1, r2 | 124 add r1, r1, r2 |
125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
126 pld [r1] | 126 pld [r1] |
127 subs r3, r3, #1 | 127 subs r3, r3, #1 |
128 stmia r0, {r9-r12} | 128 stm r0, {r9-r12} |
129 add r0, r0, r2 | 129 add r0, r0, r2 |
130 bne 2b | 130 bne 2b |
131 ldmfd sp!, {r4-r11, pc} | 131 pop {r4-r11, pc} |
132 .align 5 | 132 .align 5 |
133 3: | 133 3: |
134 ldmia r1, {r4-r8} | 134 ldm r1, {r4-r8} |
135 add r1, r1, r2 | 135 add r1, r1, r2 |
136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
137 pld [r1] | 137 pld [r1] |
138 subs r3, r3, #1 | 138 subs r3, r3, #1 |
139 stmia r0, {r9-r12} | 139 stm r0, {r9-r12} |
140 add r0, r0, r2 | 140 add r0, r0, r2 |
141 bne 3b | 141 bne 3b |
142 ldmfd sp!, {r4-r11, pc} | 142 pop {r4-r11, pc} |
143 .align 5 | 143 .align 5 |
144 4: | 144 4: |
145 ldmia r1, {r4-r8} | 145 ldm r1, {r4-r8} |
146 add r1, r1, r2 | 146 add r1, r1, r2 |
147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
148 pld [r1] | 148 pld [r1] |
149 subs r3, r3, #1 | 149 subs r3, r3, #1 |
150 stmia r0, {r9-r12} | 150 stm r0, {r9-r12} |
151 add r0, r0, r2 | 151 add r0, r0, r2 |
152 bne 4b | 152 bne 4b |
153 ldmfd sp!, {r4-r11,pc} | 153 pop {r4-r11,pc} |
154 .endfunc | 154 .endfunc |
155 | 155 |
156 @ ---------------------------------------------------------------- | 156 @ ---------------------------------------------------------------- |
157 .align 5 | 157 .align 5 |
158 function put_pixels8_arm, export=1 | 158 function put_pixels8_arm, export=1 |
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
160 @ block = word aligned, pixles = unaligned | 160 @ block = word aligned, pixles = unaligned |
161 pld [r1] | 161 pld [r1] |
162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR | 162 push {r4-r5,lr} |
163 JMP_ALIGN r5, r1 | 163 JMP_ALIGN r5, r1 |
164 1: | 164 1: |
165 ldmia r1, {r4-r5} | 165 ldm r1, {r4-r5} |
166 add r1, r1, r2 | 166 add r1, r1, r2 |
167 subs r3, r3, #1 | 167 subs r3, r3, #1 |
168 pld [r1] | 168 pld [r1] |
169 stmia r0, {r4-r5} | 169 stm r0, {r4-r5} |
170 add r0, r0, r2 | 170 add r0, r0, r2 |
171 bne 1b | 171 bne 1b |
172 ldmfd sp!, {r4-r5,pc} | 172 pop {r4-r5,pc} |
173 .align 5 | 173 .align 5 |
174 2: | 174 2: |
175 ldmia r1, {r4-r5, r12} | 175 ldm r1, {r4-r5, r12} |
176 add r1, r1, r2 | 176 add r1, r1, r2 |
177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 | 177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 |
178 pld [r1] | 178 pld [r1] |
179 subs r3, r3, #1 | 179 subs r3, r3, #1 |
180 stmia r0, {r4-r5} | 180 stm r0, {r4-r5} |
181 add r0, r0, r2 | 181 add r0, r0, r2 |
182 bne 2b | 182 bne 2b |
183 ldmfd sp!, {r4-r5,pc} | 183 pop {r4-r5,pc} |
184 .align 5 | 184 .align 5 |
185 3: | 185 3: |
186 ldmia r1, {r4-r5, r12} | 186 ldm r1, {r4-r5, r12} |
187 add r1, r1, r2 | 187 add r1, r1, r2 |
188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 | 188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 |
189 pld [r1] | 189 pld [r1] |
190 subs r3, r3, #1 | 190 subs r3, r3, #1 |
191 stmia r0, {r4-r5} | 191 stm r0, {r4-r5} |
192 add r0, r0, r2 | 192 add r0, r0, r2 |
193 bne 3b | 193 bne 3b |
194 ldmfd sp!, {r4-r5,pc} | 194 pop {r4-r5,pc} |
195 .align 5 | 195 .align 5 |
196 4: | 196 4: |
197 ldmia r1, {r4-r5, r12} | 197 ldm r1, {r4-r5, r12} |
198 add r1, r1, r2 | 198 add r1, r1, r2 |
199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 | 199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 |
200 pld [r1] | 200 pld [r1] |
201 subs r3, r3, #1 | 201 subs r3, r3, #1 |
202 stmia r0, {r4-r5} | 202 stm r0, {r4-r5} |
203 add r0, r0, r2 | 203 add r0, r0, r2 |
204 bne 4b | 204 bne 4b |
205 ldmfd sp!, {r4-r5,pc} | 205 pop {r4-r5,pc} |
206 .endfunc | 206 .endfunc |
207 | 207 |
208 @ ---------------------------------------------------------------- | 208 @ ---------------------------------------------------------------- |
209 .align 5 | 209 .align 5 |
210 function put_pixels8_x2_arm, export=1 | 210 function put_pixels8_x2_arm, export=1 |
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
212 @ block = word aligned, pixles = unaligned | 212 @ block = word aligned, pixles = unaligned |
213 pld [r1] | 213 pld [r1] |
214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR | 214 push {r4-r10,lr} |
215 ldr r12, =0xfefefefe | 215 ldr r12, =0xfefefefe |
216 JMP_ALIGN r5, r1 | 216 JMP_ALIGN r5, r1 |
217 1: | 217 1: |
218 ldmia r1, {r4-r5, r10} | 218 ldm r1, {r4-r5, r10} |
219 add r1, r1, r2 | 219 add r1, r1, r2 |
220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
221 pld [r1] | 221 pld [r1] |
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
223 subs r3, r3, #1 | 223 subs r3, r3, #1 |
224 stmia r0, {r8-r9} | 224 stm r0, {r8-r9} |
225 add r0, r0, r2 | 225 add r0, r0, r2 |
226 bne 1b | 226 bne 1b |
227 ldmfd sp!, {r4-r10,pc} | 227 pop {r4-r10,pc} |
228 .align 5 | 228 .align 5 |
229 2: | 229 2: |
230 ldmia r1, {r4-r5, r10} | 230 ldm r1, {r4-r5, r10} |
231 add r1, r1, r2 | 231 add r1, r1, r2 |
232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 | 233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 |
234 pld [r1] | 234 pld [r1] |
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
236 subs r3, r3, #1 | 236 subs r3, r3, #1 |
237 stmia r0, {r4-r5} | 237 stm r0, {r4-r5} |
238 add r0, r0, r2 | 238 add r0, r0, r2 |
239 bne 2b | 239 bne 2b |
240 ldmfd sp!, {r4-r10,pc} | 240 pop {r4-r10,pc} |
241 .align 5 | 241 .align 5 |
242 3: | 242 3: |
243 ldmia r1, {r4-r5, r10} | 243 ldm r1, {r4-r5, r10} |
244 add r1, r1, r2 | 244 add r1, r1, r2 |
245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 | 245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 |
246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 | 246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 |
247 pld [r1] | 247 pld [r1] |
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
249 subs r3, r3, #1 | 249 subs r3, r3, #1 |
250 stmia r0, {r4-r5} | 250 stm r0, {r4-r5} |
251 add r0, r0, r2 | 251 add r0, r0, r2 |
252 bne 3b | 252 bne 3b |
253 ldmfd sp!, {r4-r10,pc} | 253 pop {r4-r10,pc} |
254 .align 5 | 254 .align 5 |
255 4: | 255 4: |
256 ldmia r1, {r4-r5, r10} | 256 ldm r1, {r4-r5, r10} |
257 add r1, r1, r2 | 257 add r1, r1, r2 |
258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 | 258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 |
259 pld [r1] | 259 pld [r1] |
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | 260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 |
261 subs r3, r3, #1 | 261 subs r3, r3, #1 |
262 stmia r0, {r8-r9} | 262 stm r0, {r8-r9} |
263 add r0, r0, r2 | 263 add r0, r0, r2 |
264 bne 4b | 264 bne 4b |
265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. | 265 pop {r4-r10,pc} |
266 .endfunc | 266 .endfunc |
267 | 267 |
268 .align 5 | 268 .align 5 |
269 function put_no_rnd_pixels8_x2_arm, export=1 | 269 function put_no_rnd_pixels8_x2_arm, export=1 |
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
271 @ block = word aligned, pixles = unaligned | 271 @ block = word aligned, pixles = unaligned |
272 pld [r1] | 272 pld [r1] |
273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR | 273 push {r4-r10,lr} |
274 ldr r12, =0xfefefefe | 274 ldr r12, =0xfefefefe |
275 JMP_ALIGN r5, r1 | 275 JMP_ALIGN r5, r1 |
276 1: | 276 1: |
277 ldmia r1, {r4-r5, r10} | 277 ldm r1, {r4-r5, r10} |
278 add r1, r1, r2 | 278 add r1, r1, r2 |
279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
280 pld [r1] | 280 pld [r1] |
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
282 subs r3, r3, #1 | 282 subs r3, r3, #1 |
283 stmia r0, {r8-r9} | 283 stm r0, {r8-r9} |
284 add r0, r0, r2 | 284 add r0, r0, r2 |
285 bne 1b | 285 bne 1b |
286 ldmfd sp!, {r4-r10,pc} | 286 pop {r4-r10,pc} |
287 .align 5 | 287 .align 5 |
288 2: | 288 2: |
289 ldmia r1, {r4-r5, r10} | 289 ldm r1, {r4-r5, r10} |
290 add r1, r1, r2 | 290 add r1, r1, r2 |
291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 | 292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 |
293 pld [r1] | 293 pld [r1] |
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
295 subs r3, r3, #1 | 295 subs r3, r3, #1 |
296 stmia r0, {r4-r5} | 296 stm r0, {r4-r5} |
297 add r0, r0, r2 | 297 add r0, r0, r2 |
298 bne 2b | 298 bne 2b |
299 ldmfd sp!, {r4-r10,pc} | 299 pop {r4-r10,pc} |
300 .align 5 | 300 .align 5 |
301 3: | 301 3: |
302 ldmia r1, {r4-r5, r10} | 302 ldm r1, {r4-r5, r10} |
303 add r1, r1, r2 | 303 add r1, r1, r2 |
304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 | 304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 |
305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 | 305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 |
306 pld [r1] | 306 pld [r1] |
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | 307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 |
308 subs r3, r3, #1 | 308 subs r3, r3, #1 |
309 stmia r0, {r4-r5} | 309 stm r0, {r4-r5} |
310 add r0, r0, r2 | 310 add r0, r0, r2 |
311 bne 3b | 311 bne 3b |
312 ldmfd sp!, {r4-r10,pc} | 312 pop {r4-r10,pc} |
313 .align 5 | 313 .align 5 |
314 4: | 314 4: |
315 ldmia r1, {r4-r5, r10} | 315 ldm r1, {r4-r5, r10} |
316 add r1, r1, r2 | 316 add r1, r1, r2 |
317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 | 317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 |
318 pld [r1] | 318 pld [r1] |
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | 319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 |
320 subs r3, r3, #1 | 320 subs r3, r3, #1 |
321 stmia r0, {r8-r9} | 321 stm r0, {r8-r9} |
322 add r0, r0, r2 | 322 add r0, r0, r2 |
323 bne 4b | 323 bne 4b |
324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. | 324 pop {r4-r10,pc} |
325 .endfunc | 325 .endfunc |
326 | 326 |
327 | 327 |
328 @ ---------------------------------------------------------------- | 328 @ ---------------------------------------------------------------- |
329 .align 5 | 329 .align 5 |
330 function put_pixels8_y2_arm, export=1 | 330 function put_pixels8_y2_arm, export=1 |
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
332 @ block = word aligned, pixles = unaligned | 332 @ block = word aligned, pixles = unaligned |
333 pld [r1] | 333 pld [r1] |
334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 334 push {r4-r11,lr} |
335 mov r3, r3, lsr #1 | 335 mov r3, r3, lsr #1 |
336 ldr r12, =0xfefefefe | 336 ldr r12, =0xfefefefe |
337 JMP_ALIGN r5, r1 | 337 JMP_ALIGN r5, r1 |
338 1: | 338 1: |
339 ldmia r1, {r4-r5} | 339 ldm r1, {r4-r5} |
340 add r1, r1, r2 | 340 add r1, r1, r2 |
341 6: ldmia r1, {r6-r7} | 341 6: ldm r1, {r6-r7} |
342 add r1, r1, r2 | 342 add r1, r1, r2 |
343 pld [r1] | 343 pld [r1] |
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
345 ldmia r1, {r4-r5} | 345 ldm r1, {r4-r5} |
346 add r1, r1, r2 | 346 add r1, r1, r2 |
347 stmia r0, {r8-r9} | 347 stm r0, {r8-r9} |
348 add r0, r0, r2 | 348 add r0, r0, r2 |
349 pld [r1] | 349 pld [r1] |
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | 350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 |
351 subs r3, r3, #1 | 351 subs r3, r3, #1 |
352 stmia r0, {r8-r9} | 352 stm r0, {r8-r9} |
353 add r0, r0, r2 | 353 add r0, r0, r2 |
354 bne 6b | 354 bne 6b |
355 ldmfd sp!, {r4-r11,pc} | 355 pop {r4-r11,pc} |
356 .align 5 | 356 .align 5 |
357 2: | 357 2: |
358 ldmia r1, {r4-r6} | 358 ldm r1, {r4-r6} |
359 add r1, r1, r2 | 359 add r1, r1, r2 |
360 pld [r1] | 360 pld [r1] |
361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
362 6: ldmia r1, {r7-r9} | 362 6: ldm r1, {r7-r9} |
363 add r1, r1, r2 | 363 add r1, r1, r2 |
364 pld [r1] | 364 pld [r1] |
365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 | 365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 |
366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
367 stmia r0, {r10-r11} | 367 stm r0, {r10-r11} |
368 add r0, r0, r2 | 368 add r0, r0, r2 |
369 ldmia r1, {r4-r6} | 369 ldm r1, {r4-r6} |
370 add r1, r1, r2 | 370 add r1, r1, r2 |
371 pld [r1] | 371 pld [r1] |
372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
373 subs r3, r3, #1 | 373 subs r3, r3, #1 |
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
375 stmia r0, {r10-r11} | 375 stm r0, {r10-r11} |
376 add r0, r0, r2 | 376 add r0, r0, r2 |
377 bne 6b | 377 bne 6b |
378 ldmfd sp!, {r4-r11,pc} | 378 pop {r4-r11,pc} |
379 .align 5 | 379 .align 5 |
380 3: | 380 3: |
381 ldmia r1, {r4-r6} | 381 ldm r1, {r4-r6} |
382 add r1, r1, r2 | 382 add r1, r1, r2 |
383 pld [r1] | 383 pld [r1] |
384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
385 6: ldmia r1, {r7-r9} | 385 6: ldm r1, {r7-r9} |
386 add r1, r1, r2 | 386 add r1, r1, r2 |
387 pld [r1] | 387 pld [r1] |
388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 | 388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 |
389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
390 stmia r0, {r10-r11} | 390 stm r0, {r10-r11} |
391 add r0, r0, r2 | 391 add r0, r0, r2 |
392 ldmia r1, {r4-r6} | 392 ldm r1, {r4-r6} |
393 add r1, r1, r2 | 393 add r1, r1, r2 |
394 pld [r1] | 394 pld [r1] |
395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
396 subs r3, r3, #1 | 396 subs r3, r3, #1 |
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
398 stmia r0, {r10-r11} | 398 stm r0, {r10-r11} |
399 add r0, r0, r2 | 399 add r0, r0, r2 |
400 bne 6b | 400 bne 6b |
401 ldmfd sp!, {r4-r11,pc} | 401 pop {r4-r11,pc} |
402 .align 5 | 402 .align 5 |
403 4: | 403 4: |
404 ldmia r1, {r4-r6} | 404 ldm r1, {r4-r6} |
405 add r1, r1, r2 | 405 add r1, r1, r2 |
406 pld [r1] | 406 pld [r1] |
407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
408 6: ldmia r1, {r7-r9} | 408 6: ldm r1, {r7-r9} |
409 add r1, r1, r2 | 409 add r1, r1, r2 |
410 pld [r1] | 410 pld [r1] |
411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 | 411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 |
412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
413 stmia r0, {r10-r11} | 413 stm r0, {r10-r11} |
414 add r0, r0, r2 | 414 add r0, r0, r2 |
415 ldmia r1, {r4-r6} | 415 ldm r1, {r4-r6} |
416 add r1, r1, r2 | 416 add r1, r1, r2 |
417 pld [r1] | 417 pld [r1] |
418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
419 subs r3, r3, #1 | 419 subs r3, r3, #1 |
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
421 stmia r0, {r10-r11} | 421 stm r0, {r10-r11} |
422 add r0, r0, r2 | 422 add r0, r0, r2 |
423 bne 6b | 423 bne 6b |
424 ldmfd sp!, {r4-r11,pc} | 424 pop {r4-r11,pc} |
425 .endfunc | 425 .endfunc |
426 | 426 |
427 .align 5 | 427 .align 5 |
428 function put_no_rnd_pixels8_y2_arm, export=1 | 428 function put_no_rnd_pixels8_y2_arm, export=1 |
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
430 @ block = word aligned, pixles = unaligned | 430 @ block = word aligned, pixles = unaligned |
431 pld [r1] | 431 pld [r1] |
432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 432 push {r4-r11,lr} |
433 mov r3, r3, lsr #1 | 433 mov r3, r3, lsr #1 |
434 ldr r12, =0xfefefefe | 434 ldr r12, =0xfefefefe |
435 JMP_ALIGN r5, r1 | 435 JMP_ALIGN r5, r1 |
436 1: | 436 1: |
437 ldmia r1, {r4-r5} | 437 ldm r1, {r4-r5} |
438 add r1, r1, r2 | 438 add r1, r1, r2 |
439 6: ldmia r1, {r6-r7} | 439 6: ldm r1, {r6-r7} |
440 add r1, r1, r2 | 440 add r1, r1, r2 |
441 pld [r1] | 441 pld [r1] |
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | 442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 |
443 ldmia r1, {r4-r5} | 443 ldm r1, {r4-r5} |
444 add r1, r1, r2 | 444 add r1, r1, r2 |
445 stmia r0, {r8-r9} | 445 stm r0, {r8-r9} |
446 add r0, r0, r2 | 446 add r0, r0, r2 |
447 pld [r1] | 447 pld [r1] |
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | 448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 |
449 subs r3, r3, #1 | 449 subs r3, r3, #1 |
450 stmia r0, {r8-r9} | 450 stm r0, {r8-r9} |
451 add r0, r0, r2 | 451 add r0, r0, r2 |
452 bne 6b | 452 bne 6b |
453 ldmfd sp!, {r4-r11,pc} | 453 pop {r4-r11,pc} |
454 .align 5 | 454 .align 5 |
455 2: | 455 2: |
456 ldmia r1, {r4-r6} | 456 ldm r1, {r4-r6} |
457 add r1, r1, r2 | 457 add r1, r1, r2 |
458 pld [r1] | 458 pld [r1] |
459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
460 6: ldmia r1, {r7-r9} | 460 6: ldm r1, {r7-r9} |
461 add r1, r1, r2 | 461 add r1, r1, r2 |
462 pld [r1] | 462 pld [r1] |
463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 | 463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 |
464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
465 stmia r0, {r10-r11} | 465 stm r0, {r10-r11} |
466 add r0, r0, r2 | 466 add r0, r0, r2 |
467 ldmia r1, {r4-r6} | 467 ldm r1, {r4-r6} |
468 add r1, r1, r2 | 468 add r1, r1, r2 |
469 pld [r1] | 469 pld [r1] |
470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
471 subs r3, r3, #1 | 471 subs r3, r3, #1 |
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
473 stmia r0, {r10-r11} | 473 stm r0, {r10-r11} |
474 add r0, r0, r2 | 474 add r0, r0, r2 |
475 bne 6b | 475 bne 6b |
476 ldmfd sp!, {r4-r11,pc} | 476 pop {r4-r11,pc} |
477 .align 5 | 477 .align 5 |
478 3: | 478 3: |
479 ldmia r1, {r4-r6} | 479 ldm r1, {r4-r6} |
480 add r1, r1, r2 | 480 add r1, r1, r2 |
481 pld [r1] | 481 pld [r1] |
482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
483 6: ldmia r1, {r7-r9} | 483 6: ldm r1, {r7-r9} |
484 add r1, r1, r2 | 484 add r1, r1, r2 |
485 pld [r1] | 485 pld [r1] |
486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 | 486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 |
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
488 stmia r0, {r10-r11} | 488 stm r0, {r10-r11} |
489 add r0, r0, r2 | 489 add r0, r0, r2 |
490 ldmia r1, {r4-r6} | 490 ldm r1, {r4-r6} |
491 add r1, r1, r2 | 491 add r1, r1, r2 |
492 pld [r1] | 492 pld [r1] |
493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
494 subs r3, r3, #1 | 494 subs r3, r3, #1 |
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
496 stmia r0, {r10-r11} | 496 stm r0, {r10-r11} |
497 add r0, r0, r2 | 497 add r0, r0, r2 |
498 bne 6b | 498 bne 6b |
499 ldmfd sp!, {r4-r11,pc} | 499 pop {r4-r11,pc} |
500 .align 5 | 500 .align 5 |
501 4: | 501 4: |
502 ldmia r1, {r4-r6} | 502 ldm r1, {r4-r6} |
503 add r1, r1, r2 | 503 add r1, r1, r2 |
504 pld [r1] | 504 pld [r1] |
505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
506 6: ldmia r1, {r7-r9} | 506 6: ldm r1, {r7-r9} |
507 add r1, r1, r2 | 507 add r1, r1, r2 |
508 pld [r1] | 508 pld [r1] |
509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 | 509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 |
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 |
511 stmia r0, {r10-r11} | 511 stm r0, {r10-r11} |
512 add r0, r0, r2 | 512 add r0, r0, r2 |
513 ldmia r1, {r4-r6} | 513 ldm r1, {r4-r6} |
514 add r1, r1, r2 | 514 add r1, r1, r2 |
515 pld [r1] | 515 pld [r1] |
516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
517 subs r3, r3, #1 | 517 subs r3, r3, #1 |
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
519 stmia r0, {r10-r11} | 519 stm r0, {r10-r11} |
520 add r0, r0, r2 | 520 add r0, r0, r2 |
521 bne 6b | 521 bne 6b |
522 ldmfd sp!, {r4-r11,pc} | 522 pop {r4-r11,pc} |
523 .endfunc | 523 .endfunc |
524 | 524 |
525 .ltorg | 525 .ltorg |
526 | 526 |
527 @ ---------------------------------------------------------------- | 527 @ ---------------------------------------------------------------- |
528 .macro RND_XY2_IT align, rnd | 528 .macro RND_XY2_IT align, rnd |
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) | 529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) |
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) | 530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) |
531 .if \align == 0 | 531 .if \align == 0 |
532 ldmia r1, {r6-r8} | 532 ldm r1, {r6-r8} |
533 .elseif \align == 3 | 533 .elseif \align == 3 |
534 ldmia r1, {r5-r7} | 534 ldm r1, {r5-r7} |
535 .else | 535 .else |
536 ldmia r1, {r8-r10} | 536 ldm r1, {r8-r10} |
537 .endif | 537 .endif |
538 add r1, r1, r2 | 538 add r1, r1, r2 |
539 pld [r1] | 539 pld [r1] |
540 .if \align == 0 | 540 .if \align == 0 |
541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 | 541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 |
569 subs r3, r3, #1 | 569 subs r3, r3, #1 |
570 .endm | 570 .endm |
571 | 571 |
572 .macro RND_XY2_EXPAND align, rnd | 572 .macro RND_XY2_EXPAND align, rnd |
573 RND_XY2_IT \align, \rnd | 573 RND_XY2_IT \align, \rnd |
574 6: stmfd sp!, {r8-r11} | 574 6: push {r8-r11} |
575 RND_XY2_IT \align, \rnd | 575 RND_XY2_IT \align, \rnd |
576 ldmfd sp!, {r4-r7} | 576 pop {r4-r7} |
577 add r4, r4, r8 | 577 add r4, r4, r8 |
578 add r5, r5, r9 | 578 add r5, r5, r9 |
579 ldr r14, =0x0f0f0f0f | 579 ldr r14, =0x0f0f0f0f |
580 add r6, r6, r10 | 580 add r6, r6, r10 |
581 add r7, r7, r11 | 581 add r7, r7, r11 |
582 and r4, r14, r4, lsr #2 | 582 and r4, r14, r4, lsr #2 |
583 and r5, r14, r5, lsr #2 | 583 and r5, r14, r5, lsr #2 |
584 add r4, r4, r6 | 584 add r4, r4, r6 |
585 add r5, r5, r7 | 585 add r5, r5, r7 |
586 stmia r0, {r4-r5} | 586 stm r0, {r4-r5} |
587 add r0, r0, r2 | 587 add r0, r0, r2 |
588 bge 6b | 588 bge 6b |
589 ldmfd sp!, {r4-r11,pc} | 589 pop {r4-r11,pc} |
590 .endm | 590 .endm |
591 | 591 |
592 .align 5 | 592 .align 5 |
593 function put_pixels8_xy2_arm, export=1 | 593 function put_pixels8_xy2_arm, export=1 |
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
595 @ block = word aligned, pixles = unaligned | 595 @ block = word aligned, pixles = unaligned |
596 pld [r1] | 596 pld [r1] |
597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 597 push {r4-r11,lr} @ R14 is also called LR |
598 JMP_ALIGN r5, r1 | 598 JMP_ALIGN r5, r1 |
599 1: | 599 1: |
600 RND_XY2_EXPAND 0, lsl | 600 RND_XY2_EXPAND 0, lsl |
601 | 601 |
602 .align 5 | 602 .align 5 |
615 .align 5 | 615 .align 5 |
616 function put_no_rnd_pixels8_xy2_arm, export=1 | 616 function put_no_rnd_pixels8_xy2_arm, export=1 |
617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
618 @ block = word aligned, pixles = unaligned | 618 @ block = word aligned, pixles = unaligned |
619 pld [r1] | 619 pld [r1] |
620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 620 push {r4-r11,lr} |
621 JMP_ALIGN r5, r1 | 621 JMP_ALIGN r5, r1 |
622 1: | 622 1: |
623 RND_XY2_EXPAND 0, lsr | 623 RND_XY2_EXPAND 0, lsr |
624 | 624 |
625 .align 5 | 625 .align 5 |