Mercurial > libavcodec.hg
comparison arm/dsputil_arm_s.S @ 8678:6c256fc075e3 libavcodec
ARM: change alignment of loops in put_pixels*_arm to 32
author | mru |
---|---|
date | Tue, 27 Jan 2009 16:06:41 +0000 |
parents | 7a463923ecd1 |
children | 9c1ea156d893 |
comparison
equal
deleted
inserted
replaced
8677:3c484b73ca73 | 8678:6c256fc075e3 |
---|---|
89 add \Rd0, \Rn0, \Rd0, lsr #1 | 89 add \Rd0, \Rn0, \Rd0, lsr #1 |
90 add \Rd1, \Rn1, \Rd1, lsr #1 | 90 add \Rd1, \Rn1, \Rd1, lsr #1 |
91 .endm | 91 .endm |
92 | 92 |
93 @ ---------------------------------------------------------------- | 93 @ ---------------------------------------------------------------- |
94 .align 8 | 94 .align 5 |
95 function put_pixels16_arm, export=1 | 95 function put_pixels16_arm, export=1 |
96 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 96 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
97 @ block = word aligned, pixles = unaligned | 97 @ block = word aligned, pixles = unaligned |
98 pld [r1] | 98 pld [r1] |
99 stmfd sp!, {r4-r11, lr} @ R14 is also called LR | 99 stmfd sp!, {r4-r11, lr} @ R14 is also called LR |
109 pld [r1] | 109 pld [r1] |
110 subs r3, r3, #1 | 110 subs r3, r3, #1 |
111 add r0, r0, r2 | 111 add r0, r0, r2 |
112 bne 1b | 112 bne 1b |
113 ldmfd sp!, {r4-r11, pc} | 113 ldmfd sp!, {r4-r11, pc} |
114 .align 8 | 114 .align 5 |
115 2: | 115 2: |
116 ldmia r1, {r4-r8} | 116 ldmia r1, {r4-r8} |
117 add r1, r1, r2 | 117 add r1, r1, r2 |
118 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 118 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
119 pld [r1] | 119 pld [r1] |
120 subs r3, r3, #1 | 120 subs r3, r3, #1 |
121 stmia r0, {r9-r12} | 121 stmia r0, {r9-r12} |
122 add r0, r0, r2 | 122 add r0, r0, r2 |
123 bne 2b | 123 bne 2b |
124 ldmfd sp!, {r4-r11, pc} | 124 ldmfd sp!, {r4-r11, pc} |
125 .align 8 | 125 .align 5 |
126 3: | 126 3: |
127 ldmia r1, {r4-r8} | 127 ldmia r1, {r4-r8} |
128 add r1, r1, r2 | 128 add r1, r1, r2 |
129 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 129 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
130 pld [r1] | 130 pld [r1] |
131 subs r3, r3, #1 | 131 subs r3, r3, #1 |
132 stmia r0, {r9-r12} | 132 stmia r0, {r9-r12} |
133 add r0, r0, r2 | 133 add r0, r0, r2 |
134 bne 3b | 134 bne 3b |
135 ldmfd sp!, {r4-r11, pc} | 135 ldmfd sp!, {r4-r11, pc} |
136 .align 8 | 136 .align 5 |
137 4: | 137 4: |
138 ldmia r1, {r4-r8} | 138 ldmia r1, {r4-r8} |
139 add r1, r1, r2 | 139 add r1, r1, r2 |
140 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 | 140 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 |
141 pld [r1] | 141 pld [r1] |
142 subs r3, r3, #1 | 142 subs r3, r3, #1 |
143 stmia r0, {r9-r12} | 143 stmia r0, {r9-r12} |
144 add r0, r0, r2 | 144 add r0, r0, r2 |
145 bne 4b | 145 bne 4b |
146 ldmfd sp!, {r4-r11,pc} | 146 ldmfd sp!, {r4-r11,pc} |
147 .align 8 | |
148 5: | 147 5: |
149 .word 1b | 148 .word 1b |
150 .word 2b | 149 .word 2b |
151 .word 3b | 150 .word 3b |
152 .word 4b | 151 .word 4b |
153 .endfunc | 152 .endfunc |
154 | 153 |
155 @ ---------------------------------------------------------------- | 154 @ ---------------------------------------------------------------- |
156 .align 8 | 155 .align 5 |
157 function put_pixels8_arm, export=1 | 156 function put_pixels8_arm, export=1 |
158 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 157 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
159 @ block = word aligned, pixles = unaligned | 158 @ block = word aligned, pixles = unaligned |
160 pld [r1] | 159 pld [r1] |
161 stmfd sp!, {r4-r5,lr} @ R14 is also called LR | 160 stmfd sp!, {r4-r5,lr} @ R14 is also called LR |
171 pld [r1] | 170 pld [r1] |
172 stmia r0, {r4-r5} | 171 stmia r0, {r4-r5} |
173 add r0, r0, r2 | 172 add r0, r0, r2 |
174 bne 1b | 173 bne 1b |
175 ldmfd sp!, {r4-r5,pc} | 174 ldmfd sp!, {r4-r5,pc} |
176 .align 8 | 175 .align 5 |
177 2: | 176 2: |
178 ldmia r1, {r4-r5, r12} | 177 ldmia r1, {r4-r5, r12} |
179 add r1, r1, r2 | 178 add r1, r1, r2 |
180 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 | 179 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 |
181 pld [r1] | 180 pld [r1] |
182 subs r3, r3, #1 | 181 subs r3, r3, #1 |
183 stmia r0, {r4-r5} | 182 stmia r0, {r4-r5} |
184 add r0, r0, r2 | 183 add r0, r0, r2 |
185 bne 2b | 184 bne 2b |
186 ldmfd sp!, {r4-r5,pc} | 185 ldmfd sp!, {r4-r5,pc} |
187 .align 8 | 186 .align 5 |
188 3: | 187 3: |
189 ldmia r1, {r4-r5, r12} | 188 ldmia r1, {r4-r5, r12} |
190 add r1, r1, r2 | 189 add r1, r1, r2 |
191 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 | 190 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 |
192 pld [r1] | 191 pld [r1] |
193 subs r3, r3, #1 | 192 subs r3, r3, #1 |
194 stmia r0, {r4-r5} | 193 stmia r0, {r4-r5} |
195 add r0, r0, r2 | 194 add r0, r0, r2 |
196 bne 3b | 195 bne 3b |
197 ldmfd sp!, {r4-r5,pc} | 196 ldmfd sp!, {r4-r5,pc} |
198 .align 8 | 197 .align 5 |
199 4: | 198 4: |
200 ldmia r1, {r4-r5, r12} | 199 ldmia r1, {r4-r5, r12} |
201 add r1, r1, r2 | 200 add r1, r1, r2 |
202 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 | 201 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 |
203 pld [r1] | 202 pld [r1] |
204 subs r3, r3, #1 | 203 subs r3, r3, #1 |
205 stmia r0, {r4-r5} | 204 stmia r0, {r4-r5} |
206 add r0, r0, r2 | 205 add r0, r0, r2 |
207 bne 4b | 206 bne 4b |
208 ldmfd sp!, {r4-r5,pc} | 207 ldmfd sp!, {r4-r5,pc} |
209 .align 8 | |
210 5: | 208 5: |
211 .word 1b | 209 .word 1b |
212 .word 2b | 210 .word 2b |
213 .word 3b | 211 .word 3b |
214 .word 4b | 212 .word 4b |
215 .endfunc | 213 .endfunc |
216 | 214 |
217 @ ---------------------------------------------------------------- | 215 @ ---------------------------------------------------------------- |
218 .align 8 | 216 .align 5 |
219 function put_pixels8_x2_arm, export=1 | 217 function put_pixels8_x2_arm, export=1 |
220 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 218 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
221 @ block = word aligned, pixles = unaligned | 219 @ block = word aligned, pixles = unaligned |
222 pld [r1] | 220 pld [r1] |
223 stmfd sp!, {r4-r10,lr} @ R14 is also called LR | 221 stmfd sp!, {r4-r10,lr} @ R14 is also called LR |
236 subs r3, r3, #1 | 234 subs r3, r3, #1 |
237 stmia r0, {r8-r9} | 235 stmia r0, {r8-r9} |
238 add r0, r0, r2 | 236 add r0, r0, r2 |
239 bne 1b | 237 bne 1b |
240 ldmfd sp!, {r4-r10,pc} | 238 ldmfd sp!, {r4-r10,pc} |
241 .align 8 | 239 .align 5 |
242 2: | 240 2: |
243 ldmia r1, {r4-r5, r10} | 241 ldmia r1, {r4-r5, r10} |
244 add r1, r1, r2 | 242 add r1, r1, r2 |
245 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 243 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
246 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 | 244 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 |
249 subs r3, r3, #1 | 247 subs r3, r3, #1 |
250 stmia r0, {r4-r5} | 248 stmia r0, {r4-r5} |
251 add r0, r0, r2 | 249 add r0, r0, r2 |
252 bne 2b | 250 bne 2b |
253 ldmfd sp!, {r4-r10,pc} | 251 ldmfd sp!, {r4-r10,pc} |
254 .align 8 | 252 .align 5 |
255 3: | 253 3: |
256 ldmia r1, {r4-r5, r10} | 254 ldmia r1, {r4-r5, r10} |
257 add r1, r1, r2 | 255 add r1, r1, r2 |
258 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 | 256 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 |
259 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 | 257 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 |
262 subs r3, r3, #1 | 260 subs r3, r3, #1 |
263 stmia r0, {r4-r5} | 261 stmia r0, {r4-r5} |
264 add r0, r0, r2 | 262 add r0, r0, r2 |
265 bne 3b | 263 bne 3b |
266 ldmfd sp!, {r4-r10,pc} | 264 ldmfd sp!, {r4-r10,pc} |
267 .align 8 | 265 .align 5 |
268 4: | 266 4: |
269 ldmia r1, {r4-r5, r10} | 267 ldmia r1, {r4-r5, r10} |
270 add r1, r1, r2 | 268 add r1, r1, r2 |
271 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 | 269 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 |
272 pld [r1] | 270 pld [r1] |
274 subs r3, r3, #1 | 272 subs r3, r3, #1 |
275 stmia r0, {r8-r9} | 273 stmia r0, {r8-r9} |
276 add r0, r0, r2 | 274 add r0, r0, r2 |
277 bne 4b | 275 bne 4b |
278 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. | 276 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. |
279 .align 8 | |
280 5: | 277 5: |
281 .word 0xFEFEFEFE | 278 .word 0xFEFEFEFE |
282 .word 2b | 279 .word 2b |
283 .word 3b | 280 .word 3b |
284 .word 4b | 281 .word 4b |
285 .endfunc | 282 .endfunc |
286 | 283 |
287 .align 8 | 284 .align 5 |
288 function put_no_rnd_pixels8_x2_arm, export=1 | 285 function put_no_rnd_pixels8_x2_arm, export=1 |
289 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 286 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
290 @ block = word aligned, pixles = unaligned | 287 @ block = word aligned, pixles = unaligned |
291 pld [r1] | 288 pld [r1] |
292 stmfd sp!, {r4-r10,lr} @ R14 is also called LR | 289 stmfd sp!, {r4-r10,lr} @ R14 is also called LR |
305 subs r3, r3, #1 | 302 subs r3, r3, #1 |
306 stmia r0, {r8-r9} | 303 stmia r0, {r8-r9} |
307 add r0, r0, r2 | 304 add r0, r0, r2 |
308 bne 1b | 305 bne 1b |
309 ldmfd sp!, {r4-r10,pc} | 306 ldmfd sp!, {r4-r10,pc} |
310 .align 8 | 307 .align 5 |
311 2: | 308 2: |
312 ldmia r1, {r4-r5, r10} | 309 ldmia r1, {r4-r5, r10} |
313 add r1, r1, r2 | 310 add r1, r1, r2 |
314 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 | 311 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 |
315 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 | 312 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 |
318 subs r3, r3, #1 | 315 subs r3, r3, #1 |
319 stmia r0, {r4-r5} | 316 stmia r0, {r4-r5} |
320 add r0, r0, r2 | 317 add r0, r0, r2 |
321 bne 2b | 318 bne 2b |
322 ldmfd sp!, {r4-r10,pc} | 319 ldmfd sp!, {r4-r10,pc} |
323 .align 8 | 320 .align 5 |
324 3: | 321 3: |
325 ldmia r1, {r4-r5, r10} | 322 ldmia r1, {r4-r5, r10} |
326 add r1, r1, r2 | 323 add r1, r1, r2 |
327 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 | 324 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 |
328 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 | 325 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 |
331 subs r3, r3, #1 | 328 subs r3, r3, #1 |
332 stmia r0, {r4-r5} | 329 stmia r0, {r4-r5} |
333 add r0, r0, r2 | 330 add r0, r0, r2 |
334 bne 3b | 331 bne 3b |
335 ldmfd sp!, {r4-r10,pc} | 332 ldmfd sp!, {r4-r10,pc} |
336 .align 8 | 333 .align 5 |
337 4: | 334 4: |
338 ldmia r1, {r4-r5, r10} | 335 ldmia r1, {r4-r5, r10} |
339 add r1, r1, r2 | 336 add r1, r1, r2 |
340 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 | 337 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 |
341 pld [r1] | 338 pld [r1] |
343 subs r3, r3, #1 | 340 subs r3, r3, #1 |
344 stmia r0, {r8-r9} | 341 stmia r0, {r8-r9} |
345 add r0, r0, r2 | 342 add r0, r0, r2 |
346 bne 4b | 343 bne 4b |
347 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. | 344 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. |
348 .align 8 | |
349 5: | 345 5: |
350 .word 0xFEFEFEFE | 346 .word 0xFEFEFEFE |
351 .word 2b | 347 .word 2b |
352 .word 3b | 348 .word 3b |
353 .word 4b | 349 .word 4b |
354 .endfunc | 350 .endfunc |
355 | 351 |
356 | 352 |
357 @ ---------------------------------------------------------------- | 353 @ ---------------------------------------------------------------- |
358 .align 8 | 354 .align 5 |
359 function put_pixels8_y2_arm, export=1 | 355 function put_pixels8_y2_arm, export=1 |
360 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 356 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
361 @ block = word aligned, pixles = unaligned | 357 @ block = word aligned, pixles = unaligned |
362 pld [r1] | 358 pld [r1] |
363 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 359 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
384 subs r3, r3, #1 | 380 subs r3, r3, #1 |
385 stmia r0, {r8-r9} | 381 stmia r0, {r8-r9} |
386 add r0, r0, r2 | 382 add r0, r0, r2 |
387 bne 6b | 383 bne 6b |
388 ldmfd sp!, {r4-r11,pc} | 384 ldmfd sp!, {r4-r11,pc} |
389 .align 8 | 385 .align 5 |
390 2: | 386 2: |
391 ldmia r1, {r4-r6} | 387 ldmia r1, {r4-r6} |
392 add r1, r1, r2 | 388 add r1, r1, r2 |
393 pld [r1] | 389 pld [r1] |
394 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 390 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
407 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 403 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
408 stmia r0, {r10-r11} | 404 stmia r0, {r10-r11} |
409 add r0, r0, r2 | 405 add r0, r0, r2 |
410 bne 6b | 406 bne 6b |
411 ldmfd sp!, {r4-r11,pc} | 407 ldmfd sp!, {r4-r11,pc} |
412 .align 8 | 408 .align 5 |
413 3: | 409 3: |
414 ldmia r1, {r4-r6} | 410 ldmia r1, {r4-r6} |
415 add r1, r1, r2 | 411 add r1, r1, r2 |
416 pld [r1] | 412 pld [r1] |
417 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 413 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
430 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 426 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
431 stmia r0, {r10-r11} | 427 stmia r0, {r10-r11} |
432 add r0, r0, r2 | 428 add r0, r0, r2 |
433 bne 6b | 429 bne 6b |
434 ldmfd sp!, {r4-r11,pc} | 430 ldmfd sp!, {r4-r11,pc} |
435 .align 8 | 431 .align 5 |
436 4: | 432 4: |
437 ldmia r1, {r4-r6} | 433 ldmia r1, {r4-r6} |
438 add r1, r1, r2 | 434 add r1, r1, r2 |
439 pld [r1] | 435 pld [r1] |
440 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 436 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
454 stmia r0, {r10-r11} | 450 stmia r0, {r10-r11} |
455 add r0, r0, r2 | 451 add r0, r0, r2 |
456 bne 6b | 452 bne 6b |
457 ldmfd sp!, {r4-r11,pc} | 453 ldmfd sp!, {r4-r11,pc} |
458 | 454 |
459 .align 8 | |
460 5: | 455 5: |
461 .word 0xFEFEFEFE | 456 .word 0xFEFEFEFE |
462 .word 2b | 457 .word 2b |
463 .word 3b | 458 .word 3b |
464 .word 4b | 459 .word 4b |
465 .endfunc | 460 .endfunc |
466 | 461 |
467 .align 8 | 462 .align 5 |
468 function put_no_rnd_pixels8_y2_arm, export=1 | 463 function put_no_rnd_pixels8_y2_arm, export=1 |
469 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 464 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470 @ block = word aligned, pixles = unaligned | 465 @ block = word aligned, pixles = unaligned |
471 pld [r1] | 466 pld [r1] |
472 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 467 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
493 subs r3, r3, #1 | 488 subs r3, r3, #1 |
494 stmia r0, {r8-r9} | 489 stmia r0, {r8-r9} |
495 add r0, r0, r2 | 490 add r0, r0, r2 |
496 bne 6b | 491 bne 6b |
497 ldmfd sp!, {r4-r11,pc} | 492 ldmfd sp!, {r4-r11,pc} |
498 .align 8 | 493 .align 5 |
499 2: | 494 2: |
500 ldmia r1, {r4-r6} | 495 ldmia r1, {r4-r6} |
501 add r1, r1, r2 | 496 add r1, r1, r2 |
502 pld [r1] | 497 pld [r1] |
503 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 | 498 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 |
516 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 511 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
517 stmia r0, {r10-r11} | 512 stmia r0, {r10-r11} |
518 add r0, r0, r2 | 513 add r0, r0, r2 |
519 bne 6b | 514 bne 6b |
520 ldmfd sp!, {r4-r11,pc} | 515 ldmfd sp!, {r4-r11,pc} |
521 .align 8 | 516 .align 5 |
522 3: | 517 3: |
523 ldmia r1, {r4-r6} | 518 ldmia r1, {r4-r6} |
524 add r1, r1, r2 | 519 add r1, r1, r2 |
525 pld [r1] | 520 pld [r1] |
526 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 | 521 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 |
539 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 534 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
540 stmia r0, {r10-r11} | 535 stmia r0, {r10-r11} |
541 add r0, r0, r2 | 536 add r0, r0, r2 |
542 bne 6b | 537 bne 6b |
543 ldmfd sp!, {r4-r11,pc} | 538 ldmfd sp!, {r4-r11,pc} |
544 .align 8 | 539 .align 5 |
545 4: | 540 4: |
546 ldmia r1, {r4-r6} | 541 ldmia r1, {r4-r6} |
547 add r1, r1, r2 | 542 add r1, r1, r2 |
548 pld [r1] | 543 pld [r1] |
549 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 | 544 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 |
562 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | 557 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 |
563 stmia r0, {r10-r11} | 558 stmia r0, {r10-r11} |
564 add r0, r0, r2 | 559 add r0, r0, r2 |
565 bne 6b | 560 bne 6b |
566 ldmfd sp!, {r4-r11,pc} | 561 ldmfd sp!, {r4-r11,pc} |
567 .align 8 | |
568 5: | 562 5: |
569 .word 0xFEFEFEFE | 563 .word 0xFEFEFEFE |
570 .word 2b | 564 .word 2b |
571 .word 3b | 565 .word 3b |
572 .word 4b | 566 .word 4b |
635 add r0, r0, r2 | 629 add r0, r0, r2 |
636 bge 6b | 630 bge 6b |
637 ldmfd sp!, {r4-r11,pc} | 631 ldmfd sp!, {r4-r11,pc} |
638 .endm | 632 .endm |
639 | 633 |
640 .align 8 | 634 .align 5 |
641 function put_pixels8_xy2_arm, export=1 | 635 function put_pixels8_xy2_arm, export=1 |
642 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 636 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
643 @ block = word aligned, pixles = unaligned | 637 @ block = word aligned, pixles = unaligned |
644 pld [r1] | 638 pld [r1] |
645 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 639 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
649 bic r1, r1, #3 | 643 bic r1, r1, #3 |
650 ldrne pc, [r5] | 644 ldrne pc, [r5] |
651 1: | 645 1: |
652 RND_XY2_EXPAND 0 | 646 RND_XY2_EXPAND 0 |
653 | 647 |
654 .align 8 | 648 .align 5 |
655 2: | 649 2: |
656 RND_XY2_EXPAND 1 | 650 RND_XY2_EXPAND 1 |
657 | 651 |
658 .align 8 | 652 .align 5 |
659 3: | 653 3: |
660 RND_XY2_EXPAND 2 | 654 RND_XY2_EXPAND 2 |
661 | 655 |
662 .align 8 | 656 .align 5 |
663 4: | 657 4: |
664 RND_XY2_EXPAND 3 | 658 RND_XY2_EXPAND 3 |
665 | 659 |
666 5: | 660 5: |
667 .word 0x03030303 | 661 .word 0x03030303 |
671 .word 0x02020202 | 665 .word 0x02020202 |
672 .word 0xFCFCFCFC >> 2 | 666 .word 0xFCFCFCFC >> 2 |
673 .word 0x0F0F0F0F | 667 .word 0x0F0F0F0F |
674 .endfunc | 668 .endfunc |
675 | 669 |
676 .align 8 | 670 .align 5 |
677 function put_no_rnd_pixels8_xy2_arm, export=1 | 671 function put_no_rnd_pixels8_xy2_arm, export=1 |
678 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 672 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
679 @ block = word aligned, pixles = unaligned | 673 @ block = word aligned, pixles = unaligned |
680 pld [r1] | 674 pld [r1] |
681 stmfd sp!, {r4-r11,lr} @ R14 is also called LR | 675 stmfd sp!, {r4-r11,lr} @ R14 is also called LR |
685 bic r1, r1, #3 | 679 bic r1, r1, #3 |
686 ldrne pc, [r5] | 680 ldrne pc, [r5] |
687 1: | 681 1: |
688 RND_XY2_EXPAND 0 | 682 RND_XY2_EXPAND 0 |
689 | 683 |
690 .align 8 | 684 .align 5 |
691 2: | 685 2: |
692 RND_XY2_EXPAND 1 | 686 RND_XY2_EXPAND 1 |
693 | 687 |
694 .align 8 | 688 .align 5 |
695 3: | 689 3: |
696 RND_XY2_EXPAND 2 | 690 RND_XY2_EXPAND 2 |
697 | 691 |
698 .align 8 | 692 .align 5 |
699 4: | 693 4: |
700 RND_XY2_EXPAND 3 | 694 RND_XY2_EXPAND 3 |
701 | 695 |
702 5: | 696 5: |
703 .word 0x03030303 | 697 .word 0x03030303 |
707 .word 0x01010101 | 701 .word 0x01010101 |
708 .word 0xFCFCFCFC >> 2 | 702 .word 0xFCFCFCFC >> 2 |
709 .word 0x0F0F0F0F | 703 .word 0x0F0F0F0F |
710 .endfunc | 704 .endfunc |
711 | 705 |
706 .align 5 | |
712 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) | 707 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) |
713 function ff_add_pixels_clamped_ARM, export=1 | 708 function ff_add_pixels_clamped_ARM, export=1 |
714 push {r4-r10} | 709 push {r4-r10} |
715 mov r10, #8 | 710 mov r10, #8 |
716 1: | 711 1: |