comparison arm/dsputil_arm_s.S @ 10356:f8d0701ff445 libavcodec

ARM: update ldm/stm instructions to modern syntax
author mru
date Sat, 03 Oct 2009 18:22:52 +0000
parents 47245bb9e85f
children ada571517e88
comparison
equal deleted inserted replaced
10355:47245bb9e85f 10356:f8d0701ff445
105 .align 5 105 .align 5
106 function put_pixels16_arm, export=1 106 function put_pixels16_arm, export=1
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108 @ block = word aligned, pixles = unaligned 108 @ block = word aligned, pixles = unaligned
109 pld [r1] 109 pld [r1]
110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR 110 push {r4-r11, lr}
111 JMP_ALIGN r5, r1 111 JMP_ALIGN r5, r1
112 1: 112 1:
113 ldmia r1, {r4-r7} 113 ldm r1, {r4-r7}
114 add r1, r1, r2 114 add r1, r1, r2
115 stmia r0, {r4-r7} 115 stm r0, {r4-r7}
116 pld [r1] 116 pld [r1]
117 subs r3, r3, #1 117 subs r3, r3, #1
118 add r0, r0, r2 118 add r0, r0, r2
119 bne 1b 119 bne 1b
120 ldmfd sp!, {r4-r11, pc} 120 pop {r4-r11, pc}
121 .align 5 121 .align 5
122 2: 122 2:
123 ldmia r1, {r4-r8} 123 ldm r1, {r4-r8}
124 add r1, r1, r2 124 add r1, r1, r2
125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
126 pld [r1] 126 pld [r1]
127 subs r3, r3, #1 127 subs r3, r3, #1
128 stmia r0, {r9-r12} 128 stm r0, {r9-r12}
129 add r0, r0, r2 129 add r0, r0, r2
130 bne 2b 130 bne 2b
131 ldmfd sp!, {r4-r11, pc} 131 pop {r4-r11, pc}
132 .align 5 132 .align 5
133 3: 133 3:
134 ldmia r1, {r4-r8} 134 ldm r1, {r4-r8}
135 add r1, r1, r2 135 add r1, r1, r2
136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
137 pld [r1] 137 pld [r1]
138 subs r3, r3, #1 138 subs r3, r3, #1
139 stmia r0, {r9-r12} 139 stm r0, {r9-r12}
140 add r0, r0, r2 140 add r0, r0, r2
141 bne 3b 141 bne 3b
142 ldmfd sp!, {r4-r11, pc} 142 pop {r4-r11, pc}
143 .align 5 143 .align 5
144 4: 144 4:
145 ldmia r1, {r4-r8} 145 ldm r1, {r4-r8}
146 add r1, r1, r2 146 add r1, r1, r2
147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
148 pld [r1] 148 pld [r1]
149 subs r3, r3, #1 149 subs r3, r3, #1
150 stmia r0, {r9-r12} 150 stm r0, {r9-r12}
151 add r0, r0, r2 151 add r0, r0, r2
152 bne 4b 152 bne 4b
153 ldmfd sp!, {r4-r11,pc} 153 pop {r4-r11,pc}
154 .endfunc 154 .endfunc
155 155
156 @ ---------------------------------------------------------------- 156 @ ----------------------------------------------------------------
157 .align 5 157 .align 5
158 function put_pixels8_arm, export=1 158 function put_pixels8_arm, export=1
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160 @ block = word aligned, pixles = unaligned 160 @ block = word aligned, pixles = unaligned
161 pld [r1] 161 pld [r1]
162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR 162 push {r4-r5,lr}
163 JMP_ALIGN r5, r1 163 JMP_ALIGN r5, r1
164 1: 164 1:
165 ldmia r1, {r4-r5} 165 ldm r1, {r4-r5}
166 add r1, r1, r2 166 add r1, r1, r2
167 subs r3, r3, #1 167 subs r3, r3, #1
168 pld [r1] 168 pld [r1]
169 stmia r0, {r4-r5} 169 stm r0, {r4-r5}
170 add r0, r0, r2 170 add r0, r0, r2
171 bne 1b 171 bne 1b
172 ldmfd sp!, {r4-r5,pc} 172 pop {r4-r5,pc}
173 .align 5 173 .align 5
174 2: 174 2:
175 ldmia r1, {r4-r5, r12} 175 ldm r1, {r4-r5, r12}
176 add r1, r1, r2 176 add r1, r1, r2
177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
178 pld [r1] 178 pld [r1]
179 subs r3, r3, #1 179 subs r3, r3, #1
180 stmia r0, {r4-r5} 180 stm r0, {r4-r5}
181 add r0, r0, r2 181 add r0, r0, r2
182 bne 2b 182 bne 2b
183 ldmfd sp!, {r4-r5,pc} 183 pop {r4-r5,pc}
184 .align 5 184 .align 5
185 3: 185 3:
186 ldmia r1, {r4-r5, r12} 186 ldm r1, {r4-r5, r12}
187 add r1, r1, r2 187 add r1, r1, r2
188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
189 pld [r1] 189 pld [r1]
190 subs r3, r3, #1 190 subs r3, r3, #1
191 stmia r0, {r4-r5} 191 stm r0, {r4-r5}
192 add r0, r0, r2 192 add r0, r0, r2
193 bne 3b 193 bne 3b
194 ldmfd sp!, {r4-r5,pc} 194 pop {r4-r5,pc}
195 .align 5 195 .align 5
196 4: 196 4:
197 ldmia r1, {r4-r5, r12} 197 ldm r1, {r4-r5, r12}
198 add r1, r1, r2 198 add r1, r1, r2
199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
200 pld [r1] 200 pld [r1]
201 subs r3, r3, #1 201 subs r3, r3, #1
202 stmia r0, {r4-r5} 202 stm r0, {r4-r5}
203 add r0, r0, r2 203 add r0, r0, r2
204 bne 4b 204 bne 4b
205 ldmfd sp!, {r4-r5,pc} 205 pop {r4-r5,pc}
206 .endfunc 206 .endfunc
207 207
208 @ ---------------------------------------------------------------- 208 @ ----------------------------------------------------------------
209 .align 5 209 .align 5
210 function put_pixels8_x2_arm, export=1 210 function put_pixels8_x2_arm, export=1
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212 @ block = word aligned, pixles = unaligned 212 @ block = word aligned, pixles = unaligned
213 pld [r1] 213 pld [r1]
214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR 214 push {r4-r10,lr}
215 ldr r12, =0xfefefefe 215 ldr r12, =0xfefefefe
216 JMP_ALIGN r5, r1 216 JMP_ALIGN r5, r1
217 1: 217 1:
218 ldmia r1, {r4-r5, r10} 218 ldm r1, {r4-r5, r10}
219 add r1, r1, r2 219 add r1, r1, r2
220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
221 pld [r1] 221 pld [r1]
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
223 subs r3, r3, #1 223 subs r3, r3, #1
224 stmia r0, {r8-r9} 224 stm r0, {r8-r9}
225 add r0, r0, r2 225 add r0, r0, r2
226 bne 1b 226 bne 1b
227 ldmfd sp!, {r4-r10,pc} 227 pop {r4-r10,pc}
228 .align 5 228 .align 5
229 2: 229 2:
230 ldmia r1, {r4-r5, r10} 230 ldm r1, {r4-r5, r10}
231 add r1, r1, r2 231 add r1, r1, r2
232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
234 pld [r1] 234 pld [r1]
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
236 subs r3, r3, #1 236 subs r3, r3, #1
237 stmia r0, {r4-r5} 237 stm r0, {r4-r5}
238 add r0, r0, r2 238 add r0, r0, r2
239 bne 2b 239 bne 2b
240 ldmfd sp!, {r4-r10,pc} 240 pop {r4-r10,pc}
241 .align 5 241 .align 5
242 3: 242 3:
243 ldmia r1, {r4-r5, r10} 243 ldm r1, {r4-r5, r10}
244 add r1, r1, r2 244 add r1, r1, r2
245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
247 pld [r1] 247 pld [r1]
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249 subs r3, r3, #1 249 subs r3, r3, #1
250 stmia r0, {r4-r5} 250 stm r0, {r4-r5}
251 add r0, r0, r2 251 add r0, r0, r2
252 bne 3b 252 bne 3b
253 ldmfd sp!, {r4-r10,pc} 253 pop {r4-r10,pc}
254 .align 5 254 .align 5
255 4: 255 4:
256 ldmia r1, {r4-r5, r10} 256 ldm r1, {r4-r5, r10}
257 add r1, r1, r2 257 add r1, r1, r2
258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
259 pld [r1] 259 pld [r1]
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
261 subs r3, r3, #1 261 subs r3, r3, #1
262 stmia r0, {r8-r9} 262 stm r0, {r8-r9}
263 add r0, r0, r2 263 add r0, r0, r2
264 bne 4b 264 bne 4b
265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. 265 pop {r4-r10,pc}
266 .endfunc 266 .endfunc
267 267
268 .align 5 268 .align 5
269 function put_no_rnd_pixels8_x2_arm, export=1 269 function put_no_rnd_pixels8_x2_arm, export=1
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271 @ block = word aligned, pixles = unaligned 271 @ block = word aligned, pixles = unaligned
272 pld [r1] 272 pld [r1]
273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR 273 push {r4-r10,lr}
274 ldr r12, =0xfefefefe 274 ldr r12, =0xfefefefe
275 JMP_ALIGN r5, r1 275 JMP_ALIGN r5, r1
276 1: 276 1:
277 ldmia r1, {r4-r5, r10} 277 ldm r1, {r4-r5, r10}
278 add r1, r1, r2 278 add r1, r1, r2
279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
280 pld [r1] 280 pld [r1]
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
282 subs r3, r3, #1 282 subs r3, r3, #1
283 stmia r0, {r8-r9} 283 stm r0, {r8-r9}
284 add r0, r0, r2 284 add r0, r0, r2
285 bne 1b 285 bne 1b
286 ldmfd sp!, {r4-r10,pc} 286 pop {r4-r10,pc}
287 .align 5 287 .align 5
288 2: 288 2:
289 ldmia r1, {r4-r5, r10} 289 ldm r1, {r4-r5, r10}
290 add r1, r1, r2 290 add r1, r1, r2
291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
293 pld [r1] 293 pld [r1]
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
295 subs r3, r3, #1 295 subs r3, r3, #1
296 stmia r0, {r4-r5} 296 stm r0, {r4-r5}
297 add r0, r0, r2 297 add r0, r0, r2
298 bne 2b 298 bne 2b
299 ldmfd sp!, {r4-r10,pc} 299 pop {r4-r10,pc}
300 .align 5 300 .align 5
301 3: 301 3:
302 ldmia r1, {r4-r5, r10} 302 ldm r1, {r4-r5, r10}
303 add r1, r1, r2 303 add r1, r1, r2
304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
306 pld [r1] 306 pld [r1]
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
308 subs r3, r3, #1 308 subs r3, r3, #1
309 stmia r0, {r4-r5} 309 stm r0, {r4-r5}
310 add r0, r0, r2 310 add r0, r0, r2
311 bne 3b 311 bne 3b
312 ldmfd sp!, {r4-r10,pc} 312 pop {r4-r10,pc}
313 .align 5 313 .align 5
314 4: 314 4:
315 ldmia r1, {r4-r5, r10} 315 ldm r1, {r4-r5, r10}
316 add r1, r1, r2 316 add r1, r1, r2
317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
318 pld [r1] 318 pld [r1]
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
320 subs r3, r3, #1 320 subs r3, r3, #1
321 stmia r0, {r8-r9} 321 stm r0, {r8-r9}
322 add r0, r0, r2 322 add r0, r0, r2
323 bne 4b 323 bne 4b
324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. 324 pop {r4-r10,pc}
325 .endfunc 325 .endfunc
326 326
327 327
328 @ ---------------------------------------------------------------- 328 @ ----------------------------------------------------------------
329 .align 5 329 .align 5
330 function put_pixels8_y2_arm, export=1 330 function put_pixels8_y2_arm, export=1
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332 @ block = word aligned, pixles = unaligned 332 @ block = word aligned, pixles = unaligned
333 pld [r1] 333 pld [r1]
334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 334 push {r4-r11,lr}
335 mov r3, r3, lsr #1 335 mov r3, r3, lsr #1
336 ldr r12, =0xfefefefe 336 ldr r12, =0xfefefefe
337 JMP_ALIGN r5, r1 337 JMP_ALIGN r5, r1
338 1: 338 1:
339 ldmia r1, {r4-r5} 339 ldm r1, {r4-r5}
340 add r1, r1, r2 340 add r1, r1, r2
341 6: ldmia r1, {r6-r7} 341 6: ldm r1, {r6-r7}
342 add r1, r1, r2 342 add r1, r1, r2
343 pld [r1] 343 pld [r1]
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
345 ldmia r1, {r4-r5} 345 ldm r1, {r4-r5}
346 add r1, r1, r2 346 add r1, r1, r2
347 stmia r0, {r8-r9} 347 stm r0, {r8-r9}
348 add r0, r0, r2 348 add r0, r0, r2
349 pld [r1] 349 pld [r1]
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
351 subs r3, r3, #1 351 subs r3, r3, #1
352 stmia r0, {r8-r9} 352 stm r0, {r8-r9}
353 add r0, r0, r2 353 add r0, r0, r2
354 bne 6b 354 bne 6b
355 ldmfd sp!, {r4-r11,pc} 355 pop {r4-r11,pc}
356 .align 5 356 .align 5
357 2: 357 2:
358 ldmia r1, {r4-r6} 358 ldm r1, {r4-r6}
359 add r1, r1, r2 359 add r1, r1, r2
360 pld [r1] 360 pld [r1]
361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
362 6: ldmia r1, {r7-r9} 362 6: ldm r1, {r7-r9}
363 add r1, r1, r2 363 add r1, r1, r2
364 pld [r1] 364 pld [r1]
365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
367 stmia r0, {r10-r11} 367 stm r0, {r10-r11}
368 add r0, r0, r2 368 add r0, r0, r2
369 ldmia r1, {r4-r6} 369 ldm r1, {r4-r6}
370 add r1, r1, r2 370 add r1, r1, r2
371 pld [r1] 371 pld [r1]
372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
373 subs r3, r3, #1 373 subs r3, r3, #1
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
375 stmia r0, {r10-r11} 375 stm r0, {r10-r11}
376 add r0, r0, r2 376 add r0, r0, r2
377 bne 6b 377 bne 6b
378 ldmfd sp!, {r4-r11,pc} 378 pop {r4-r11,pc}
379 .align 5 379 .align 5
380 3: 380 3:
381 ldmia r1, {r4-r6} 381 ldm r1, {r4-r6}
382 add r1, r1, r2 382 add r1, r1, r2
383 pld [r1] 383 pld [r1]
384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
385 6: ldmia r1, {r7-r9} 385 6: ldm r1, {r7-r9}
386 add r1, r1, r2 386 add r1, r1, r2
387 pld [r1] 387 pld [r1]
388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
390 stmia r0, {r10-r11} 390 stm r0, {r10-r11}
391 add r0, r0, r2 391 add r0, r0, r2
392 ldmia r1, {r4-r6} 392 ldm r1, {r4-r6}
393 add r1, r1, r2 393 add r1, r1, r2
394 pld [r1] 394 pld [r1]
395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
396 subs r3, r3, #1 396 subs r3, r3, #1
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
398 stmia r0, {r10-r11} 398 stm r0, {r10-r11}
399 add r0, r0, r2 399 add r0, r0, r2
400 bne 6b 400 bne 6b
401 ldmfd sp!, {r4-r11,pc} 401 pop {r4-r11,pc}
402 .align 5 402 .align 5
403 4: 403 4:
404 ldmia r1, {r4-r6} 404 ldm r1, {r4-r6}
405 add r1, r1, r2 405 add r1, r1, r2
406 pld [r1] 406 pld [r1]
407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
408 6: ldmia r1, {r7-r9} 408 6: ldm r1, {r7-r9}
409 add r1, r1, r2 409 add r1, r1, r2
410 pld [r1] 410 pld [r1]
411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
413 stmia r0, {r10-r11} 413 stm r0, {r10-r11}
414 add r0, r0, r2 414 add r0, r0, r2
415 ldmia r1, {r4-r6} 415 ldm r1, {r4-r6}
416 add r1, r1, r2 416 add r1, r1, r2
417 pld [r1] 417 pld [r1]
418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
419 subs r3, r3, #1 419 subs r3, r3, #1
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
421 stmia r0, {r10-r11} 421 stm r0, {r10-r11}
422 add r0, r0, r2 422 add r0, r0, r2
423 bne 6b 423 bne 6b
424 ldmfd sp!, {r4-r11,pc} 424 pop {r4-r11,pc}
425 .endfunc 425 .endfunc
426 426
427 .align 5 427 .align 5
428 function put_no_rnd_pixels8_y2_arm, export=1 428 function put_no_rnd_pixels8_y2_arm, export=1
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430 @ block = word aligned, pixles = unaligned 430 @ block = word aligned, pixles = unaligned
431 pld [r1] 431 pld [r1]
432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 432 push {r4-r11,lr}
433 mov r3, r3, lsr #1 433 mov r3, r3, lsr #1
434 ldr r12, =0xfefefefe 434 ldr r12, =0xfefefefe
435 JMP_ALIGN r5, r1 435 JMP_ALIGN r5, r1
436 1: 436 1:
437 ldmia r1, {r4-r5} 437 ldm r1, {r4-r5}
438 add r1, r1, r2 438 add r1, r1, r2
439 6: ldmia r1, {r6-r7} 439 6: ldm r1, {r6-r7}
440 add r1, r1, r2 440 add r1, r1, r2
441 pld [r1] 441 pld [r1]
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
443 ldmia r1, {r4-r5} 443 ldm r1, {r4-r5}
444 add r1, r1, r2 444 add r1, r1, r2
445 stmia r0, {r8-r9} 445 stm r0, {r8-r9}
446 add r0, r0, r2 446 add r0, r0, r2
447 pld [r1] 447 pld [r1]
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
449 subs r3, r3, #1 449 subs r3, r3, #1
450 stmia r0, {r8-r9} 450 stm r0, {r8-r9}
451 add r0, r0, r2 451 add r0, r0, r2
452 bne 6b 452 bne 6b
453 ldmfd sp!, {r4-r11,pc} 453 pop {r4-r11,pc}
454 .align 5 454 .align 5
455 2: 455 2:
456 ldmia r1, {r4-r6} 456 ldm r1, {r4-r6}
457 add r1, r1, r2 457 add r1, r1, r2
458 pld [r1] 458 pld [r1]
459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
460 6: ldmia r1, {r7-r9} 460 6: ldm r1, {r7-r9}
461 add r1, r1, r2 461 add r1, r1, r2
462 pld [r1] 462 pld [r1]
463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
465 stmia r0, {r10-r11} 465 stm r0, {r10-r11}
466 add r0, r0, r2 466 add r0, r0, r2
467 ldmia r1, {r4-r6} 467 ldm r1, {r4-r6}
468 add r1, r1, r2 468 add r1, r1, r2
469 pld [r1] 469 pld [r1]
470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
471 subs r3, r3, #1 471 subs r3, r3, #1
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
473 stmia r0, {r10-r11} 473 stm r0, {r10-r11}
474 add r0, r0, r2 474 add r0, r0, r2
475 bne 6b 475 bne 6b
476 ldmfd sp!, {r4-r11,pc} 476 pop {r4-r11,pc}
477 .align 5 477 .align 5
478 3: 478 3:
479 ldmia r1, {r4-r6} 479 ldm r1, {r4-r6}
480 add r1, r1, r2 480 add r1, r1, r2
481 pld [r1] 481 pld [r1]
482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
483 6: ldmia r1, {r7-r9} 483 6: ldm r1, {r7-r9}
484 add r1, r1, r2 484 add r1, r1, r2
485 pld [r1] 485 pld [r1]
486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488 stmia r0, {r10-r11} 488 stm r0, {r10-r11}
489 add r0, r0, r2 489 add r0, r0, r2
490 ldmia r1, {r4-r6} 490 ldm r1, {r4-r6}
491 add r1, r1, r2 491 add r1, r1, r2
492 pld [r1] 492 pld [r1]
493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
494 subs r3, r3, #1 494 subs r3, r3, #1
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496 stmia r0, {r10-r11} 496 stm r0, {r10-r11}
497 add r0, r0, r2 497 add r0, r0, r2
498 bne 6b 498 bne 6b
499 ldmfd sp!, {r4-r11,pc} 499 pop {r4-r11,pc}
500 .align 5 500 .align 5
501 4: 501 4:
502 ldmia r1, {r4-r6} 502 ldm r1, {r4-r6}
503 add r1, r1, r2 503 add r1, r1, r2
504 pld [r1] 504 pld [r1]
505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
506 6: ldmia r1, {r7-r9} 506 6: ldm r1, {r7-r9}
507 add r1, r1, r2 507 add r1, r1, r2
508 pld [r1] 508 pld [r1]
509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511 stmia r0, {r10-r11} 511 stm r0, {r10-r11}
512 add r0, r0, r2 512 add r0, r0, r2
513 ldmia r1, {r4-r6} 513 ldm r1, {r4-r6}
514 add r1, r1, r2 514 add r1, r1, r2
515 pld [r1] 515 pld [r1]
516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
517 subs r3, r3, #1 517 subs r3, r3, #1
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519 stmia r0, {r10-r11} 519 stm r0, {r10-r11}
520 add r0, r0, r2 520 add r0, r0, r2
521 bne 6b 521 bne 6b
522 ldmfd sp!, {r4-r11,pc} 522 pop {r4-r11,pc}
523 .endfunc 523 .endfunc
524 524
525 .ltorg 525 .ltorg
526 526
527 @ ---------------------------------------------------------------- 527 @ ----------------------------------------------------------------
528 .macro RND_XY2_IT align, rnd 528 .macro RND_XY2_IT align, rnd
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) 529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) 530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531 .if \align == 0 531 .if \align == 0
532 ldmia r1, {r6-r8} 532 ldm r1, {r6-r8}
533 .elseif \align == 3 533 .elseif \align == 3
534 ldmia r1, {r5-r7} 534 ldm r1, {r5-r7}
535 .else 535 .else
536 ldmia r1, {r8-r10} 536 ldm r1, {r8-r10}
537 .endif 537 .endif
538 add r1, r1, r2 538 add r1, r1, r2
539 pld [r1] 539 pld [r1]
540 .if \align == 0 540 .if \align == 0
541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
569 subs r3, r3, #1 569 subs r3, r3, #1
570 .endm 570 .endm
571 571
572 .macro RND_XY2_EXPAND align, rnd 572 .macro RND_XY2_EXPAND align, rnd
573 RND_XY2_IT \align, \rnd 573 RND_XY2_IT \align, \rnd
574 6: stmfd sp!, {r8-r11} 574 6: push {r8-r11}
575 RND_XY2_IT \align, \rnd 575 RND_XY2_IT \align, \rnd
576 ldmfd sp!, {r4-r7} 576 pop {r4-r7}
577 add r4, r4, r8 577 add r4, r4, r8
578 add r5, r5, r9 578 add r5, r5, r9
579 ldr r14, =0x0f0f0f0f 579 ldr r14, =0x0f0f0f0f
580 add r6, r6, r10 580 add r6, r6, r10
581 add r7, r7, r11 581 add r7, r7, r11
582 and r4, r14, r4, lsr #2 582 and r4, r14, r4, lsr #2
583 and r5, r14, r5, lsr #2 583 and r5, r14, r5, lsr #2
584 add r4, r4, r6 584 add r4, r4, r6
585 add r5, r5, r7 585 add r5, r5, r7
586 stmia r0, {r4-r5} 586 stm r0, {r4-r5}
587 add r0, r0, r2 587 add r0, r0, r2
588 bge 6b 588 bge 6b
589 ldmfd sp!, {r4-r11,pc} 589 pop {r4-r11,pc}
590 .endm 590 .endm
591 591
592 .align 5 592 .align 5
593 function put_pixels8_xy2_arm, export=1 593 function put_pixels8_xy2_arm, export=1
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595 @ block = word aligned, pixles = unaligned 595 @ block = word aligned, pixles = unaligned
596 pld [r1] 596 pld [r1]
597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 597 push {r4-r11,lr} @ R14 is also called LR
598 JMP_ALIGN r5, r1 598 JMP_ALIGN r5, r1
599 1: 599 1:
600 RND_XY2_EXPAND 0, lsl 600 RND_XY2_EXPAND 0, lsl
601 601
602 .align 5 602 .align 5
615 .align 5 615 .align 5
616 function put_no_rnd_pixels8_xy2_arm, export=1 616 function put_no_rnd_pixels8_xy2_arm, export=1
617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618 @ block = word aligned, pixles = unaligned 618 @ block = word aligned, pixles = unaligned
619 pld [r1] 619 pld [r1]
620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 620 push {r4-r11,lr}
621 JMP_ALIGN r5, r1 621 JMP_ALIGN r5, r1
622 1: 622 1:
623 RND_XY2_EXPAND 0, lsr 623 RND_XY2_EXPAND 0, lsr
624 624
625 .align 5 625 .align 5