Mercurial > libavcodec.hg
annotate armv4l/simple_idct_armv5te.S @ 4098:db808f95ad06 libavcodec
lambda* should be unsigned
author | michael |
---|---|
date | Sat, 28 Oct 2006 16:11:25 +0000 |
parents | c8c591fe26f8 |
children | 97d82c7585b4 |
rev | line source |
---|---|
3769 | 1 /* |
2 * Simple IDCT | |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com> | |
6 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
3769 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
12 * version 2.1 of the License, or (at your option) any later version. |
3769 | 13 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
3769 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
3769 | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 */ | |
23 | |
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
31 #define ROW_SHIFT 11 | |
32 #define COL_SHIFT 20 | |
33 | |
34 #define W13 (W1 | (W3 << 16)) | |
35 #define W26 (W2 | (W6 << 16)) | |
36 #define W57 (W5 | (W7 << 16)) | |
37 | |
38 .text | |
39 .align | |
40 w13: .long W13 | |
41 w26: .long W26 | |
42 w57: .long W57 | |
43 | |
44 .align | |
45 .func idct_row_armv5te | |
46 idct_row_armv5te: | |
47 str lr, [sp, #-4]! | |
48 | |
49 ldrd v1, [a1, #8] | |
50 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |
51 orrs v1, v1, v2 | |
52 cmpeq v1, a4 | |
53 cmpeq v1, a3, lsr #16 | |
54 beq row_dc_only | |
55 | |
56 mov v1, #(1<<(ROW_SHIFT-1)) | |
57 mov ip, #16384 | |
58 sub ip, ip, #1 /* ip = W4 */ | |
59 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ | |
60 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
61 smultb a2, ip, a4 | |
62 smulbb lr, ip, a4 | |
63 add v2, v1, a2 | |
64 sub v3, v1, a2 | |
65 sub v4, v1, lr | |
66 add v1, v1, lr | |
67 | |
68 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ | |
69 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */ | |
70 smulbt v5, ip, a3 | |
71 smultt v6, lr, a4 | |
72 smlatt v5, ip, a4, v5 | |
73 smultt a2, ip, a3 | |
74 smulbt v7, lr, a3 | |
75 sub v6, v6, a2 | |
76 smulbt a2, ip, a4 | |
77 smultt fp, lr, a3 | |
78 sub v7, v7, a2 | |
79 smulbt a2, lr, a4 | |
80 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |
81 sub fp, fp, a2 | |
82 | |
83 orrs a2, a3, a4 | |
84 beq 1f | |
85 | |
86 smlabt v5, lr, a3, v5 | |
87 smlabt v6, ip, a3, v6 | |
88 smlatt v5, lr, a4, v5 | |
89 smlabt v6, lr, a4, v6 | |
90 smlatt v7, lr, a3, v7 | |
91 smlatt fp, ip, a3, fp | |
92 smulbt a2, ip, a4 | |
93 smlatt v7, ip, a4, v7 | |
94 sub fp, fp, a2 | |
95 | |
96 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
97 mov a2, #16384 | |
98 sub a2, a2, #1 /* a2 = W4 */ | |
99 smulbb a2, a2, a3 /* a2 = W4*row[4] */ | |
100 smultb lr, ip, a4 /* lr = W6*row[6] */ | |
101 add v1, v1, a2 /* v1 += W4*row[4] */ | |
102 add v1, v1, lr /* v1 += W6*row[6] */ | |
103 add v4, v4, a2 /* v4 += W4*row[4] */ | |
104 sub v4, v4, lr /* v4 -= W6*row[6] */ | |
105 smulbb lr, ip, a4 /* lr = W2*row[6] */ | |
106 sub v2, v2, a2 /* v2 -= W4*row[4] */ | |
107 sub v2, v2, lr /* v2 -= W2*row[6] */ | |
108 sub v3, v3, a2 /* v3 -= W4*row[4] */ | |
109 add v3, v3, lr /* v3 += W2*row[6] */ | |
110 | |
111 1: add a2, v1, v5 | |
112 mov a3, a2, lsr #11 | |
113 bic a3, a3, #0x1f0000 | |
114 sub a2, v2, v6 | |
115 mov a2, a2, lsr #11 | |
116 add a3, a3, a2, lsl #16 | |
117 add a2, v3, v7 | |
118 mov a4, a2, lsr #11 | |
119 bic a4, a4, #0x1f0000 | |
120 add a2, v4, fp | |
121 mov a2, a2, lsr #11 | |
122 add a4, a4, a2, lsl #16 | |
123 strd a3, [a1] | |
124 | |
125 sub a2, v4, fp | |
126 mov a3, a2, lsr #11 | |
127 bic a3, a3, #0x1f0000 | |
128 sub a2, v3, v7 | |
129 mov a2, a2, lsr #11 | |
130 add a3, a3, a2, lsl #16 | |
131 add a2, v2, v6 | |
132 mov a4, a2, lsr #11 | |
133 bic a4, a4, #0x1f0000 | |
134 sub a2, v1, v5 | |
135 mov a2, a2, lsr #11 | |
136 add a4, a4, a2, lsl #16 | |
137 strd a3, [a1, #8] | |
138 | |
139 ldr pc, [sp], #4 | |
140 | |
141 row_dc_only: | |
142 orr a3, a3, a3, lsl #16 | |
143 bic a3, a3, #0xe000 | |
144 mov a3, a3, lsl #3 | |
145 mov a4, a3 | |
146 strd a3, [a1] | |
147 strd a3, [a1, #8] | |
148 | |
149 ldr pc, [sp], #4 | |
150 .endfunc | |
151 | |
152 .macro idct_col | |
153 ldr a4, [a1] /* a4 = col[1:0] */ | |
154 mov ip, #16384 | |
155 sub ip, ip, #1 /* ip = W4 */ | |
156 #if 0 | |
157 mov v1, #(1<<(COL_SHIFT-1)) | |
158 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ | |
159 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ | |
160 ldr a4, [a1, #(16*4)] | |
161 #else | |
162 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ | |
163 add v2, v1, a4, asr #16 | |
164 rsb v2, v2, v2, lsl #14 | |
165 mov a4, a4, lsl #16 | |
166 add v1, v1, a4, asr #16 | |
167 ldr a4, [a1, #(16*4)] | |
168 rsb v1, v1, v1, lsl #14 | |
169 #endif | |
170 | |
171 smulbb lr, ip, a4 | |
172 smulbt a3, ip, a4 | |
173 sub v3, v1, lr | |
174 sub v5, v1, lr | |
175 add v7, v1, lr | |
176 add v1, v1, lr | |
177 sub v4, v2, a3 | |
178 sub v6, v2, a3 | |
179 add fp, v2, a3 | |
180 ldr ip, [pc, #(w26-.-8)] | |
181 ldr a4, [a1, #(16*2)] | |
182 add v2, v2, a3 | |
183 | |
184 smulbb lr, ip, a4 | |
185 smultb a3, ip, a4 | |
186 add v1, v1, lr | |
187 sub v7, v7, lr | |
188 add v3, v3, a3 | |
189 sub v5, v5, a3 | |
190 smulbt lr, ip, a4 | |
191 smultt a3, ip, a4 | |
192 add v2, v2, lr | |
193 sub fp, fp, lr | |
194 add v4, v4, a3 | |
195 ldr a4, [a1, #(16*6)] | |
196 sub v6, v6, a3 | |
197 | |
198 smultb lr, ip, a4 | |
199 smulbb a3, ip, a4 | |
200 add v1, v1, lr | |
201 sub v7, v7, lr | |
202 sub v3, v3, a3 | |
203 add v5, v5, a3 | |
204 smultt lr, ip, a4 | |
205 smulbt a3, ip, a4 | |
206 add v2, v2, lr | |
207 sub fp, fp, lr | |
208 sub v4, v4, a3 | |
209 add v6, v6, a3 | |
210 | |
211 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} | |
212 | |
213 ldr ip, [pc, #(w13-.-8)] | |
214 ldr a4, [a1, #(16*1)] | |
215 ldr lr, [pc, #(w57-.-8)] | |
216 smulbb v1, ip, a4 | |
217 smultb v3, ip, a4 | |
218 smulbb v5, lr, a4 | |
219 smultb v7, lr, a4 | |
220 smulbt v2, ip, a4 | |
221 smultt v4, ip, a4 | |
222 smulbt v6, lr, a4 | |
223 smultt fp, lr, a4 | |
224 rsb v4, v4, #0 | |
225 ldr a4, [a1, #(16*3)] | |
226 rsb v3, v3, #0 | |
227 | |
228 smlatb v1, ip, a4, v1 | |
229 smlatb v3, lr, a4, v3 | |
230 smulbb a3, ip, a4 | |
231 smulbb a2, lr, a4 | |
232 sub v5, v5, a3 | |
233 sub v7, v7, a2 | |
234 smlatt v2, ip, a4, v2 | |
235 smlatt v4, lr, a4, v4 | |
236 smulbt a3, ip, a4 | |
237 smulbt a2, lr, a4 | |
238 sub v6, v6, a3 | |
239 ldr a4, [a1, #(16*5)] | |
240 sub fp, fp, a2 | |
241 | |
242 smlabb v1, lr, a4, v1 | |
243 smlabb v3, ip, a4, v3 | |
244 smlatb v5, lr, a4, v5 | |
245 smlatb v7, ip, a4, v7 | |
246 smlabt v2, lr, a4, v2 | |
247 smlabt v4, ip, a4, v4 | |
248 smlatt v6, lr, a4, v6 | |
249 ldr a3, [a1, #(16*7)] | |
250 smlatt fp, ip, a4, fp | |
251 | |
252 smlatb v1, lr, a3, v1 | |
253 smlabb v3, lr, a3, v3 | |
254 smlatb v5, ip, a3, v5 | |
255 smulbb a4, ip, a3 | |
256 smlatt v2, lr, a3, v2 | |
257 sub v7, v7, a4 | |
258 smlabt v4, lr, a3, v4 | |
259 smulbt a4, ip, a3 | |
260 smlatt v6, ip, a3, v6 | |
261 sub fp, fp, a4 | |
262 .endm | |
263 | |
264 .align | |
265 .func idct_col_armv5te | |
266 idct_col_armv5te: | |
267 str lr, [sp, #-4]! | |
268 | |
269 idct_col | |
270 | |
271 ldmfd sp!, {a3, a4} | |
272 adds a2, a3, v1 | |
273 mov a2, a2, lsr #20 | |
274 orrmi a2, a2, #0xf000 | |
275 add ip, a4, v2 | |
276 mov ip, ip, asr #20 | |
277 orr a2, a2, ip, lsl #16 | |
278 str a2, [a1] | |
279 subs a3, a3, v1 | |
280 mov a2, a3, lsr #20 | |
281 orrmi a2, a2, #0xf000 | |
282 sub a4, a4, v2 | |
283 mov a4, a4, asr #20 | |
284 orr a2, a2, a4, lsl #16 | |
285 ldmfd sp!, {a3, a4} | |
286 str a2, [a1, #(16*7)] | |
287 | |
288 subs a2, a3, v3 | |
289 mov a2, a2, lsr #20 | |
290 orrmi a2, a2, #0xf000 | |
291 sub ip, a4, v4 | |
292 mov ip, ip, asr #20 | |
293 orr a2, a2, ip, lsl #16 | |
294 str a2, [a1, #(16*1)] | |
295 adds a3, a3, v3 | |
296 mov a2, a3, lsr #20 | |
297 orrmi a2, a2, #0xf000 | |
298 add a4, a4, v4 | |
299 mov a4, a4, asr #20 | |
300 orr a2, a2, a4, lsl #16 | |
301 ldmfd sp!, {a3, a4} | |
302 str a2, [a1, #(16*6)] | |
303 | |
304 adds a2, a3, v5 | |
305 mov a2, a2, lsr #20 | |
306 orrmi a2, a2, #0xf000 | |
307 add ip, a4, v6 | |
308 mov ip, ip, asr #20 | |
309 orr a2, a2, ip, lsl #16 | |
310 str a2, [a1, #(16*2)] | |
311 subs a3, a3, v5 | |
312 mov a2, a3, lsr #20 | |
313 orrmi a2, a2, #0xf000 | |
314 sub a4, a4, v6 | |
315 mov a4, a4, asr #20 | |
316 orr a2, a2, a4, lsl #16 | |
317 ldmfd sp!, {a3, a4} | |
318 str a2, [a1, #(16*5)] | |
319 | |
320 adds a2, a3, v7 | |
321 mov a2, a2, lsr #20 | |
322 orrmi a2, a2, #0xf000 | |
323 add ip, a4, fp | |
324 mov ip, ip, asr #20 | |
325 orr a2, a2, ip, lsl #16 | |
326 str a2, [a1, #(16*3)] | |
327 subs a3, a3, v7 | |
328 mov a2, a3, lsr #20 | |
329 orrmi a2, a2, #0xf000 | |
330 sub a4, a4, fp | |
331 mov a4, a4, asr #20 | |
332 orr a2, a2, a4, lsl #16 | |
333 str a2, [a1, #(16*4)] | |
334 | |
335 ldr pc, [sp], #4 | |
336 .endfunc | |
337 | |
338 .align | |
339 .func idct_col_put_armv5te | |
340 idct_col_put_armv5te: | |
341 str lr, [sp, #-4]! | |
342 | |
343 idct_col | |
344 | |
345 ldmfd sp!, {a3, a4} | |
346 ldr lr, [sp, #32] | |
347 add a2, a3, v1 | |
348 movs a2, a2, asr #20 | |
349 movmi a2, #0 | |
350 cmp a2, #255 | |
351 movgt a2, #255 | |
352 add ip, a4, v2 | |
353 movs ip, ip, asr #20 | |
354 movmi ip, #0 | |
355 cmp ip, #255 | |
356 movgt ip, #255 | |
357 orr a2, a2, ip, lsl #8 | |
358 sub a3, a3, v1 | |
359 movs a3, a3, asr #20 | |
360 movmi a3, #0 | |
361 cmp a3, #255 | |
362 movgt a3, #255 | |
363 sub a4, a4, v2 | |
364 movs a4, a4, asr #20 | |
365 movmi a4, #0 | |
366 cmp a4, #255 | |
367 ldr v1, [sp, #28] | |
368 movgt a4, #255 | |
369 strh a2, [v1] | |
370 add a2, v1, #2 | |
371 str a2, [sp, #28] | |
372 orr a2, a3, a4, lsl #8 | |
373 rsb v2, lr, lr, lsl #3 | |
374 ldmfd sp!, {a3, a4} | |
375 strh a2, [v2, v1]! | |
376 | |
377 sub a2, a3, v3 | |
378 movs a2, a2, asr #20 | |
379 movmi a2, #0 | |
380 cmp a2, #255 | |
381 movgt a2, #255 | |
382 sub ip, a4, v4 | |
383 movs ip, ip, asr #20 | |
384 movmi ip, #0 | |
385 cmp ip, #255 | |
386 movgt ip, #255 | |
387 orr a2, a2, ip, lsl #8 | |
388 strh a2, [v1, lr]! | |
389 add a3, a3, v3 | |
390 movs a2, a3, asr #20 | |
391 movmi a2, #0 | |
392 cmp a2, #255 | |
393 movgt a2, #255 | |
394 add a4, a4, v4 | |
395 movs a4, a4, asr #20 | |
396 movmi a4, #0 | |
397 cmp a4, #255 | |
398 movgt a4, #255 | |
399 orr a2, a2, a4, lsl #8 | |
400 ldmfd sp!, {a3, a4} | |
401 strh a2, [v2, -lr]! | |
402 | |
403 add a2, a3, v5 | |
404 movs a2, a2, asr #20 | |
405 movmi a2, #0 | |
406 cmp a2, #255 | |
407 movgt a2, #255 | |
408 add ip, a4, v6 | |
409 movs ip, ip, asr #20 | |
410 movmi ip, #0 | |
411 cmp ip, #255 | |
412 movgt ip, #255 | |
413 orr a2, a2, ip, lsl #8 | |
414 strh a2, [v1, lr]! | |
415 sub a3, a3, v5 | |
416 movs a2, a3, asr #20 | |
417 movmi a2, #0 | |
418 cmp a2, #255 | |
419 movgt a2, #255 | |
420 sub a4, a4, v6 | |
421 movs a4, a4, asr #20 | |
422 movmi a4, #0 | |
423 cmp a4, #255 | |
424 movgt a4, #255 | |
425 orr a2, a2, a4, lsl #8 | |
426 ldmfd sp!, {a3, a4} | |
427 strh a2, [v2, -lr]! | |
428 | |
429 add a2, a3, v7 | |
430 movs a2, a2, asr #20 | |
431 movmi a2, #0 | |
432 cmp a2, #255 | |
433 movgt a2, #255 | |
434 add ip, a4, fp | |
435 movs ip, ip, asr #20 | |
436 movmi ip, #0 | |
437 cmp ip, #255 | |
438 movgt ip, #255 | |
439 orr a2, a2, ip, lsl #8 | |
440 strh a2, [v1, lr] | |
441 sub a3, a3, v7 | |
442 movs a2, a3, asr #20 | |
443 movmi a2, #0 | |
444 cmp a2, #255 | |
445 movgt a2, #255 | |
446 sub a4, a4, fp | |
447 movs a4, a4, asr #20 | |
448 movmi a4, #0 | |
449 cmp a4, #255 | |
450 movgt a4, #255 | |
451 orr a2, a2, a4, lsl #8 | |
452 strh a2, [v2, -lr] | |
453 | |
454 ldr pc, [sp], #4 | |
455 .endfunc | |
456 | |
457 .align | |
458 .func idct_col_add_armv5te | |
459 idct_col_add_armv5te: | |
460 str lr, [sp, #-4]! | |
461 | |
462 idct_col | |
463 | |
464 ldr lr, [sp, #36] | |
465 | |
466 ldmfd sp!, {a3, a4} | |
467 ldrh ip, [lr] | |
468 add a2, a3, v1 | |
469 mov a2, a2, asr #20 | |
470 sub a3, a3, v1 | |
471 and v1, ip, #255 | |
472 adds a2, a2, v1 | |
473 movmi a2, #0 | |
474 cmp a2, #255 | |
475 movgt a2, #255 | |
476 add v1, a4, v2 | |
477 mov v1, v1, asr #20 | |
478 adds v1, v1, ip, lsr #8 | |
479 movmi v1, #0 | |
480 cmp v1, #255 | |
481 movgt v1, #255 | |
482 orr a2, a2, v1, lsl #8 | |
483 ldr v1, [sp, #32] | |
484 sub a4, a4, v2 | |
485 rsb v2, v1, v1, lsl #3 | |
486 ldrh ip, [v2, lr]! | |
487 strh a2, [lr] | |
488 mov a3, a3, asr #20 | |
489 and a2, ip, #255 | |
490 adds a3, a3, a2 | |
491 movmi a3, #0 | |
492 cmp a3, #255 | |
493 movgt a3, #255 | |
494 mov a4, a4, asr #20 | |
495 adds a4, a4, ip, lsr #8 | |
496 movmi a4, #0 | |
497 cmp a4, #255 | |
498 movgt a4, #255 | |
499 add a2, lr, #2 | |
500 str a2, [sp, #28] | |
501 orr a2, a3, a4, lsl #8 | |
502 strh a2, [v2] | |
503 | |
504 ldmfd sp!, {a3, a4} | |
505 ldrh ip, [lr, v1]! | |
506 sub a2, a3, v3 | |
507 mov a2, a2, asr #20 | |
508 add a3, a3, v3 | |
509 and v3, ip, #255 | |
510 adds a2, a2, v3 | |
511 movmi a2, #0 | |
512 cmp a2, #255 | |
513 movgt a2, #255 | |
514 sub v3, a4, v4 | |
515 mov v3, v3, asr #20 | |
516 adds v3, v3, ip, lsr #8 | |
517 movmi v3, #0 | |
518 cmp v3, #255 | |
519 movgt v3, #255 | |
520 orr a2, a2, v3, lsl #8 | |
521 add a4, a4, v4 | |
522 ldrh ip, [v2, -v1]! | |
523 strh a2, [lr] | |
524 mov a3, a3, asr #20 | |
525 and a2, ip, #255 | |
526 adds a3, a3, a2 | |
527 movmi a3, #0 | |
528 cmp a3, #255 | |
529 movgt a3, #255 | |
530 mov a4, a4, asr #20 | |
531 adds a4, a4, ip, lsr #8 | |
532 movmi a4, #0 | |
533 cmp a4, #255 | |
534 movgt a4, #255 | |
535 orr a2, a3, a4, lsl #8 | |
536 strh a2, [v2] | |
537 | |
538 ldmfd sp!, {a3, a4} | |
539 ldrh ip, [lr, v1]! | |
540 add a2, a3, v5 | |
541 mov a2, a2, asr #20 | |
542 sub a3, a3, v5 | |
543 and v3, ip, #255 | |
544 adds a2, a2, v3 | |
545 movmi a2, #0 | |
546 cmp a2, #255 | |
547 movgt a2, #255 | |
548 add v3, a4, v6 | |
549 mov v3, v3, asr #20 | |
550 adds v3, v3, ip, lsr #8 | |
551 movmi v3, #0 | |
552 cmp v3, #255 | |
553 movgt v3, #255 | |
554 orr a2, a2, v3, lsl #8 | |
555 sub a4, a4, v6 | |
556 ldrh ip, [v2, -v1]! | |
557 strh a2, [lr] | |
558 mov a3, a3, asr #20 | |
559 and a2, ip, #255 | |
560 adds a3, a3, a2 | |
561 movmi a3, #0 | |
562 cmp a3, #255 | |
563 movgt a3, #255 | |
564 mov a4, a4, asr #20 | |
565 adds a4, a4, ip, lsr #8 | |
566 movmi a4, #0 | |
567 cmp a4, #255 | |
568 movgt a4, #255 | |
569 orr a2, a3, a4, lsl #8 | |
570 strh a2, [v2] | |
571 | |
572 ldmfd sp!, {a3, a4} | |
573 ldrh ip, [lr, v1]! | |
574 add a2, a3, v7 | |
575 mov a2, a2, asr #20 | |
576 sub a3, a3, v7 | |
577 and v3, ip, #255 | |
578 adds a2, a2, v3 | |
579 movmi a2, #0 | |
580 cmp a2, #255 | |
581 movgt a2, #255 | |
582 add v3, a4, fp | |
583 mov v3, v3, asr #20 | |
584 adds v3, v3, ip, lsr #8 | |
585 movmi v3, #0 | |
586 cmp v3, #255 | |
587 movgt v3, #255 | |
588 orr a2, a2, v3, lsl #8 | |
589 sub a4, a4, fp | |
590 ldrh ip, [v2, -v1]! | |
591 strh a2, [lr] | |
592 mov a3, a3, asr #20 | |
593 and a2, ip, #255 | |
594 adds a3, a3, a2 | |
595 movmi a3, #0 | |
596 cmp a3, #255 | |
597 movgt a3, #255 | |
598 mov a4, a4, asr #20 | |
599 adds a4, a4, ip, lsr #8 | |
600 movmi a4, #0 | |
601 cmp a4, #255 | |
602 movgt a4, #255 | |
603 orr a2, a3, a4, lsl #8 | |
604 strh a2, [v2] | |
605 | |
606 ldr pc, [sp], #4 | |
607 .endfunc | |
608 | |
609 .align | |
610 .global simple_idct_armv5te | |
611 .func simple_idct_armv5te | |
612 simple_idct_armv5te: | |
613 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
614 | |
615 bl idct_row_armv5te | |
616 add a1, a1, #16 | |
617 bl idct_row_armv5te | |
618 add a1, a1, #16 | |
619 bl idct_row_armv5te | |
620 add a1, a1, #16 | |
621 bl idct_row_armv5te | |
622 add a1, a1, #16 | |
623 bl idct_row_armv5te | |
624 add a1, a1, #16 | |
625 bl idct_row_armv5te | |
626 add a1, a1, #16 | |
627 bl idct_row_armv5te | |
628 add a1, a1, #16 | |
629 bl idct_row_armv5te | |
630 | |
631 sub a1, a1, #(16*7) | |
632 | |
633 bl idct_col_armv5te | |
634 add a1, a1, #4 | |
635 bl idct_col_armv5te | |
636 add a1, a1, #4 | |
637 bl idct_col_armv5te | |
638 add a1, a1, #4 | |
639 bl idct_col_armv5te | |
640 | |
641 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
642 .endfunc | |
643 | |
644 .align | |
645 .global simple_idct_add_armv5te | |
646 .func simple_idct_add_armv5te | |
647 simple_idct_add_armv5te: | |
648 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
649 | |
650 mov a1, a3 | |
651 | |
652 bl idct_row_armv5te | |
653 add a1, a1, #16 | |
654 bl idct_row_armv5te | |
655 add a1, a1, #16 | |
656 bl idct_row_armv5te | |
657 add a1, a1, #16 | |
658 bl idct_row_armv5te | |
659 add a1, a1, #16 | |
660 bl idct_row_armv5te | |
661 add a1, a1, #16 | |
662 bl idct_row_armv5te | |
663 add a1, a1, #16 | |
664 bl idct_row_armv5te | |
665 add a1, a1, #16 | |
666 bl idct_row_armv5te | |
667 | |
668 sub a1, a1, #(16*7) | |
669 | |
670 bl idct_col_add_armv5te | |
671 add a1, a1, #4 | |
672 bl idct_col_add_armv5te | |
673 add a1, a1, #4 | |
674 bl idct_col_add_armv5te | |
675 add a1, a1, #4 | |
676 bl idct_col_add_armv5te | |
677 | |
678 add sp, sp, #8 | |
679 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
680 .endfunc | |
681 | |
682 .align | |
683 .global simple_idct_put_armv5te | |
684 .func simple_idct_put_armv5te | |
685 simple_idct_put_armv5te: | |
686 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
687 | |
688 mov a1, a3 | |
689 | |
690 bl idct_row_armv5te | |
691 add a1, a1, #16 | |
692 bl idct_row_armv5te | |
693 add a1, a1, #16 | |
694 bl idct_row_armv5te | |
695 add a1, a1, #16 | |
696 bl idct_row_armv5te | |
697 add a1, a1, #16 | |
698 bl idct_row_armv5te | |
699 add a1, a1, #16 | |
700 bl idct_row_armv5te | |
701 add a1, a1, #16 | |
702 bl idct_row_armv5te | |
703 add a1, a1, #16 | |
704 bl idct_row_armv5te | |
705 | |
706 sub a1, a1, #(16*7) | |
707 | |
708 bl idct_col_put_armv5te | |
709 add a1, a1, #4 | |
710 bl idct_col_put_armv5te | |
711 add a1, a1, #4 | |
712 bl idct_col_put_armv5te | |
713 add a1, a1, #4 | |
714 bl idct_col_put_armv5te | |
715 | |
716 add sp, sp, #8 | |
717 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
718 .endfunc |