Mercurial > libavcodec.hg
annotate armv4l/simple_idct_armv5te.S @ 3990:746a60ba3177 libavcodec
enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author | michael |
---|---|
date | Wed, 11 Oct 2006 12:23:40 +0000 |
parents | c8c591fe26f8 |
children | 97d82c7585b4 |
rev | line source |
---|---|
3769 | 1 /* |
2 * Simple IDCT | |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com> | |
6 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
3769 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
12 * version 2.1 of the License, or (at your option) any later version. |
3769 | 13 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
3769 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
3769 | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 */ | |
23 | |
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
31 #define ROW_SHIFT 11 | |
32 #define COL_SHIFT 20 | |
33 | |
34 #define W13 (W1 | (W3 << 16)) | |
35 #define W26 (W2 | (W6 << 16)) | |
36 #define W57 (W5 | (W7 << 16)) | |
37 | |
38 .text | |
39 .align | |
40 w13: .long W13 | |
41 w26: .long W26 | |
42 w57: .long W57 | |
43 | |
44 .align | |
45 .func idct_row_armv5te | |
46 idct_row_armv5te: | |
47 str lr, [sp, #-4]! | |
48 | |
49 ldrd v1, [a1, #8] | |
50 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |
51 orrs v1, v1, v2 | |
52 cmpeq v1, a4 | |
53 cmpeq v1, a3, lsr #16 | |
54 beq row_dc_only | |
55 | |
56 mov v1, #(1<<(ROW_SHIFT-1)) | |
57 mov ip, #16384 | |
58 sub ip, ip, #1 /* ip = W4 */ | |
59 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ | |
60 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
61 smultb a2, ip, a4 | |
62 smulbb lr, ip, a4 | |
63 add v2, v1, a2 | |
64 sub v3, v1, a2 | |
65 sub v4, v1, lr | |
66 add v1, v1, lr | |
67 | |
68 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ | |
69 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */ | |
70 smulbt v5, ip, a3 | |
71 smultt v6, lr, a4 | |
72 smlatt v5, ip, a4, v5 | |
73 smultt a2, ip, a3 | |
74 smulbt v7, lr, a3 | |
75 sub v6, v6, a2 | |
76 smulbt a2, ip, a4 | |
77 smultt fp, lr, a3 | |
78 sub v7, v7, a2 | |
79 smulbt a2, lr, a4 | |
80 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |
81 sub fp, fp, a2 | |
82 | |
83 orrs a2, a3, a4 | |
84 beq 1f | |
85 | |
86 smlabt v5, lr, a3, v5 | |
87 smlabt v6, ip, a3, v6 | |
88 smlatt v5, lr, a4, v5 | |
89 smlabt v6, lr, a4, v6 | |
90 smlatt v7, lr, a3, v7 | |
91 smlatt fp, ip, a3, fp | |
92 smulbt a2, ip, a4 | |
93 smlatt v7, ip, a4, v7 | |
94 sub fp, fp, a2 | |
95 | |
96 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
97 mov a2, #16384 | |
98 sub a2, a2, #1 /* a2 = W4 */ | |
99 smulbb a2, a2, a3 /* a2 = W4*row[4] */ | |
100 smultb lr, ip, a4 /* lr = W6*row[6] */ | |
101 add v1, v1, a2 /* v1 += W4*row[4] */ | |
102 add v1, v1, lr /* v1 += W6*row[6] */ | |
103 add v4, v4, a2 /* v4 += W4*row[4] */ | |
104 sub v4, v4, lr /* v4 -= W6*row[6] */ | |
105 smulbb lr, ip, a4 /* lr = W2*row[6] */ | |
106 sub v2, v2, a2 /* v2 -= W4*row[4] */ | |
107 sub v2, v2, lr /* v2 -= W2*row[6] */ | |
108 sub v3, v3, a2 /* v3 -= W4*row[4] */ | |
109 add v3, v3, lr /* v3 += W2*row[6] */ | |
110 | |
111 1: add a2, v1, v5 | |
112 mov a3, a2, lsr #11 | |
113 bic a3, a3, #0x1f0000 | |
114 sub a2, v2, v6 | |
115 mov a2, a2, lsr #11 | |
116 add a3, a3, a2, lsl #16 | |
117 add a2, v3, v7 | |
118 mov a4, a2, lsr #11 | |
119 bic a4, a4, #0x1f0000 | |
120 add a2, v4, fp | |
121 mov a2, a2, lsr #11 | |
122 add a4, a4, a2, lsl #16 | |
123 strd a3, [a1] | |
124 | |
125 sub a2, v4, fp | |
126 mov a3, a2, lsr #11 | |
127 bic a3, a3, #0x1f0000 | |
128 sub a2, v3, v7 | |
129 mov a2, a2, lsr #11 | |
130 add a3, a3, a2, lsl #16 | |
131 add a2, v2, v6 | |
132 mov a4, a2, lsr #11 | |
133 bic a4, a4, #0x1f0000 | |
134 sub a2, v1, v5 | |
135 mov a2, a2, lsr #11 | |
136 add a4, a4, a2, lsl #16 | |
137 strd a3, [a1, #8] | |
138 | |
139 ldr pc, [sp], #4 | |
140 | |
141 row_dc_only: | |
142 orr a3, a3, a3, lsl #16 | |
143 bic a3, a3, #0xe000 | |
144 mov a3, a3, lsl #3 | |
145 mov a4, a3 | |
146 strd a3, [a1] | |
147 strd a3, [a1, #8] | |
148 | |
149 ldr pc, [sp], #4 | |
150 .endfunc | |
151 | |
152 .macro idct_col | |
153 ldr a4, [a1] /* a4 = col[1:0] */ | |
154 mov ip, #16384 | |
155 sub ip, ip, #1 /* ip = W4 */ | |
156 #if 0 | |
157 mov v1, #(1<<(COL_SHIFT-1)) | |
158 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ | |
159 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ | |
160 ldr a4, [a1, #(16*4)] | |
161 #else | |
162 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ | |
163 add v2, v1, a4, asr #16 | |
164 rsb v2, v2, v2, lsl #14 | |
165 mov a4, a4, lsl #16 | |
166 add v1, v1, a4, asr #16 | |
167 ldr a4, [a1, #(16*4)] | |
168 rsb v1, v1, v1, lsl #14 | |
169 #endif | |
170 | |
171 smulbb lr, ip, a4 | |
172 smulbt a3, ip, a4 | |
173 sub v3, v1, lr | |
174 sub v5, v1, lr | |
175 add v7, v1, lr | |
176 add v1, v1, lr | |
177 sub v4, v2, a3 | |
178 sub v6, v2, a3 | |
179 add fp, v2, a3 | |
180 ldr ip, [pc, #(w26-.-8)] | |
181 ldr a4, [a1, #(16*2)] | |
182 add v2, v2, a3 | |
183 | |
184 smulbb lr, ip, a4 | |
185 smultb a3, ip, a4 | |
186 add v1, v1, lr | |
187 sub v7, v7, lr | |
188 add v3, v3, a3 | |
189 sub v5, v5, a3 | |
190 smulbt lr, ip, a4 | |
191 smultt a3, ip, a4 | |
192 add v2, v2, lr | |
193 sub fp, fp, lr | |
194 add v4, v4, a3 | |
195 ldr a4, [a1, #(16*6)] | |
196 sub v6, v6, a3 | |
197 | |
198 smultb lr, ip, a4 | |
199 smulbb a3, ip, a4 | |
200 add v1, v1, lr | |
201 sub v7, v7, lr | |
202 sub v3, v3, a3 | |
203 add v5, v5, a3 | |
204 smultt lr, ip, a4 | |
205 smulbt a3, ip, a4 | |
206 add v2, v2, lr | |
207 sub fp, fp, lr | |
208 sub v4, v4, a3 | |
209 add v6, v6, a3 | |
210 | |
211 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} | |
212 | |
213 ldr ip, [pc, #(w13-.-8)] | |
214 ldr a4, [a1, #(16*1)] | |
215 ldr lr, [pc, #(w57-.-8)] | |
216 smulbb v1, ip, a4 | |
217 smultb v3, ip, a4 | |
218 smulbb v5, lr, a4 | |
219 smultb v7, lr, a4 | |
220 smulbt v2, ip, a4 | |
221 smultt v4, ip, a4 | |
222 smulbt v6, lr, a4 | |
223 smultt fp, lr, a4 | |
224 rsb v4, v4, #0 | |
225 ldr a4, [a1, #(16*3)] | |
226 rsb v3, v3, #0 | |
227 | |
228 smlatb v1, ip, a4, v1 | |
229 smlatb v3, lr, a4, v3 | |
230 smulbb a3, ip, a4 | |
231 smulbb a2, lr, a4 | |
232 sub v5, v5, a3 | |
233 sub v7, v7, a2 | |
234 smlatt v2, ip, a4, v2 | |
235 smlatt v4, lr, a4, v4 | |
236 smulbt a3, ip, a4 | |
237 smulbt a2, lr, a4 | |
238 sub v6, v6, a3 | |
239 ldr a4, [a1, #(16*5)] | |
240 sub fp, fp, a2 | |
241 | |
242 smlabb v1, lr, a4, v1 | |
243 smlabb v3, ip, a4, v3 | |
244 smlatb v5, lr, a4, v5 | |
245 smlatb v7, ip, a4, v7 | |
246 smlabt v2, lr, a4, v2 | |
247 smlabt v4, ip, a4, v4 | |
248 smlatt v6, lr, a4, v6 | |
249 ldr a3, [a1, #(16*7)] | |
250 smlatt fp, ip, a4, fp | |
251 | |
252 smlatb v1, lr, a3, v1 | |
253 smlabb v3, lr, a3, v3 | |
254 smlatb v5, ip, a3, v5 | |
255 smulbb a4, ip, a3 | |
256 smlatt v2, lr, a3, v2 | |
257 sub v7, v7, a4 | |
258 smlabt v4, lr, a3, v4 | |
259 smulbt a4, ip, a3 | |
260 smlatt v6, ip, a3, v6 | |
261 sub fp, fp, a4 | |
262 .endm | |
263 | |
264 .align | |
265 .func idct_col_armv5te | |
266 idct_col_armv5te: | |
267 str lr, [sp, #-4]! | |
268 | |
269 idct_col | |
270 | |
271 ldmfd sp!, {a3, a4} | |
272 adds a2, a3, v1 | |
273 mov a2, a2, lsr #20 | |
274 orrmi a2, a2, #0xf000 | |
275 add ip, a4, v2 | |
276 mov ip, ip, asr #20 | |
277 orr a2, a2, ip, lsl #16 | |
278 str a2, [a1] | |
279 subs a3, a3, v1 | |
280 mov a2, a3, lsr #20 | |
281 orrmi a2, a2, #0xf000 | |
282 sub a4, a4, v2 | |
283 mov a4, a4, asr #20 | |
284 orr a2, a2, a4, lsl #16 | |
285 ldmfd sp!, {a3, a4} | |
286 str a2, [a1, #(16*7)] | |
287 | |
288 subs a2, a3, v3 | |
289 mov a2, a2, lsr #20 | |
290 orrmi a2, a2, #0xf000 | |
291 sub ip, a4, v4 | |
292 mov ip, ip, asr #20 | |
293 orr a2, a2, ip, lsl #16 | |
294 str a2, [a1, #(16*1)] | |
295 adds a3, a3, v3 | |
296 mov a2, a3, lsr #20 | |
297 orrmi a2, a2, #0xf000 | |
298 add a4, a4, v4 | |
299 mov a4, a4, asr #20 | |
300 orr a2, a2, a4, lsl #16 | |
301 ldmfd sp!, {a3, a4} | |
302 str a2, [a1, #(16*6)] | |
303 | |
304 adds a2, a3, v5 | |
305 mov a2, a2, lsr #20 | |
306 orrmi a2, a2, #0xf000 | |
307 add ip, a4, v6 | |
308 mov ip, ip, asr #20 | |
309 orr a2, a2, ip, lsl #16 | |
310 str a2, [a1, #(16*2)] | |
311 subs a3, a3, v5 | |
312 mov a2, a3, lsr #20 | |
313 orrmi a2, a2, #0xf000 | |
314 sub a4, a4, v6 | |
315 mov a4, a4, asr #20 | |
316 orr a2, a2, a4, lsl #16 | |
317 ldmfd sp!, {a3, a4} | |
318 str a2, [a1, #(16*5)] | |
319 | |
320 adds a2, a3, v7 | |
321 mov a2, a2, lsr #20 | |
322 orrmi a2, a2, #0xf000 | |
323 add ip, a4, fp | |
324 mov ip, ip, asr #20 | |
325 orr a2, a2, ip, lsl #16 | |
326 str a2, [a1, #(16*3)] | |
327 subs a3, a3, v7 | |
328 mov a2, a3, lsr #20 | |
329 orrmi a2, a2, #0xf000 | |
330 sub a4, a4, fp | |
331 mov a4, a4, asr #20 | |
332 orr a2, a2, a4, lsl #16 | |
333 str a2, [a1, #(16*4)] | |
334 | |
335 ldr pc, [sp], #4 | |
336 .endfunc | |
337 | |
338 .align | |
339 .func idct_col_put_armv5te | |
340 idct_col_put_armv5te: | |
341 str lr, [sp, #-4]! | |
342 | |
343 idct_col | |
344 | |
345 ldmfd sp!, {a3, a4} | |
346 ldr lr, [sp, #32] | |
347 add a2, a3, v1 | |
348 movs a2, a2, asr #20 | |
349 movmi a2, #0 | |
350 cmp a2, #255 | |
351 movgt a2, #255 | |
352 add ip, a4, v2 | |
353 movs ip, ip, asr #20 | |
354 movmi ip, #0 | |
355 cmp ip, #255 | |
356 movgt ip, #255 | |
357 orr a2, a2, ip, lsl #8 | |
358 sub a3, a3, v1 | |
359 movs a3, a3, asr #20 | |
360 movmi a3, #0 | |
361 cmp a3, #255 | |
362 movgt a3, #255 | |
363 sub a4, a4, v2 | |
364 movs a4, a4, asr #20 | |
365 movmi a4, #0 | |
366 cmp a4, #255 | |
367 ldr v1, [sp, #28] | |
368 movgt a4, #255 | |
369 strh a2, [v1] | |
370 add a2, v1, #2 | |
371 str a2, [sp, #28] | |
372 orr a2, a3, a4, lsl #8 | |
373 rsb v2, lr, lr, lsl #3 | |
374 ldmfd sp!, {a3, a4} | |
375 strh a2, [v2, v1]! | |
376 | |
377 sub a2, a3, v3 | |
378 movs a2, a2, asr #20 | |
379 movmi a2, #0 | |
380 cmp a2, #255 | |
381 movgt a2, #255 | |
382 sub ip, a4, v4 | |
383 movs ip, ip, asr #20 | |
384 movmi ip, #0 | |
385 cmp ip, #255 | |
386 movgt ip, #255 | |
387 orr a2, a2, ip, lsl #8 | |
388 strh a2, [v1, lr]! | |
389 add a3, a3, v3 | |
390 movs a2, a3, asr #20 | |
391 movmi a2, #0 | |
392 cmp a2, #255 | |
393 movgt a2, #255 | |
394 add a4, a4, v4 | |
395 movs a4, a4, asr #20 | |
396 movmi a4, #0 | |
397 cmp a4, #255 | |
398 movgt a4, #255 | |
399 orr a2, a2, a4, lsl #8 | |
400 ldmfd sp!, {a3, a4} | |
401 strh a2, [v2, -lr]! | |
402 | |
403 add a2, a3, v5 | |
404 movs a2, a2, asr #20 | |
405 movmi a2, #0 | |
406 cmp a2, #255 | |
407 movgt a2, #255 | |
408 add ip, a4, v6 | |
409 movs ip, ip, asr #20 | |
410 movmi ip, #0 | |
411 cmp ip, #255 | |
412 movgt ip, #255 | |
413 orr a2, a2, ip, lsl #8 | |
414 strh a2, [v1, lr]! | |
415 sub a3, a3, v5 | |
416 movs a2, a3, asr #20 | |
417 movmi a2, #0 | |
418 cmp a2, #255 | |
419 movgt a2, #255 | |
420 sub a4, a4, v6 | |
421 movs a4, a4, asr #20 | |
422 movmi a4, #0 | |
423 cmp a4, #255 | |
424 movgt a4, #255 | |
425 orr a2, a2, a4, lsl #8 | |
426 ldmfd sp!, {a3, a4} | |
427 strh a2, [v2, -lr]! | |
428 | |
429 add a2, a3, v7 | |
430 movs a2, a2, asr #20 | |
431 movmi a2, #0 | |
432 cmp a2, #255 | |
433 movgt a2, #255 | |
434 add ip, a4, fp | |
435 movs ip, ip, asr #20 | |
436 movmi ip, #0 | |
437 cmp ip, #255 | |
438 movgt ip, #255 | |
439 orr a2, a2, ip, lsl #8 | |
440 strh a2, [v1, lr] | |
441 sub a3, a3, v7 | |
442 movs a2, a3, asr #20 | |
443 movmi a2, #0 | |
444 cmp a2, #255 | |
445 movgt a2, #255 | |
446 sub a4, a4, fp | |
447 movs a4, a4, asr #20 | |
448 movmi a4, #0 | |
449 cmp a4, #255 | |
450 movgt a4, #255 | |
451 orr a2, a2, a4, lsl #8 | |
452 strh a2, [v2, -lr] | |
453 | |
454 ldr pc, [sp], #4 | |
455 .endfunc | |
456 | |
457 .align | |
458 .func idct_col_add_armv5te | |
459 idct_col_add_armv5te: | |
460 str lr, [sp, #-4]! | |
461 | |
462 idct_col | |
463 | |
464 ldr lr, [sp, #36] | |
465 | |
466 ldmfd sp!, {a3, a4} | |
467 ldrh ip, [lr] | |
468 add a2, a3, v1 | |
469 mov a2, a2, asr #20 | |
470 sub a3, a3, v1 | |
471 and v1, ip, #255 | |
472 adds a2, a2, v1 | |
473 movmi a2, #0 | |
474 cmp a2, #255 | |
475 movgt a2, #255 | |
476 add v1, a4, v2 | |
477 mov v1, v1, asr #20 | |
478 adds v1, v1, ip, lsr #8 | |
479 movmi v1, #0 | |
480 cmp v1, #255 | |
481 movgt v1, #255 | |
482 orr a2, a2, v1, lsl #8 | |
483 ldr v1, [sp, #32] | |
484 sub a4, a4, v2 | |
485 rsb v2, v1, v1, lsl #3 | |
486 ldrh ip, [v2, lr]! | |
487 strh a2, [lr] | |
488 mov a3, a3, asr #20 | |
489 and a2, ip, #255 | |
490 adds a3, a3, a2 | |
491 movmi a3, #0 | |
492 cmp a3, #255 | |
493 movgt a3, #255 | |
494 mov a4, a4, asr #20 | |
495 adds a4, a4, ip, lsr #8 | |
496 movmi a4, #0 | |
497 cmp a4, #255 | |
498 movgt a4, #255 | |
499 add a2, lr, #2 | |
500 str a2, [sp, #28] | |
501 orr a2, a3, a4, lsl #8 | |
502 strh a2, [v2] | |
503 | |
504 ldmfd sp!, {a3, a4} | |
505 ldrh ip, [lr, v1]! | |
506 sub a2, a3, v3 | |
507 mov a2, a2, asr #20 | |
508 add a3, a3, v3 | |
509 and v3, ip, #255 | |
510 adds a2, a2, v3 | |
511 movmi a2, #0 | |
512 cmp a2, #255 | |
513 movgt a2, #255 | |
514 sub v3, a4, v4 | |
515 mov v3, v3, asr #20 | |
516 adds v3, v3, ip, lsr #8 | |
517 movmi v3, #0 | |
518 cmp v3, #255 | |
519 movgt v3, #255 | |
520 orr a2, a2, v3, lsl #8 | |
521 add a4, a4, v4 | |
522 ldrh ip, [v2, -v1]! | |
523 strh a2, [lr] | |
524 mov a3, a3, asr #20 | |
525 and a2, ip, #255 | |
526 adds a3, a3, a2 | |
527 movmi a3, #0 | |
528 cmp a3, #255 | |
529 movgt a3, #255 | |
530 mov a4, a4, asr #20 | |
531 adds a4, a4, ip, lsr #8 | |
532 movmi a4, #0 | |
533 cmp a4, #255 | |
534 movgt a4, #255 | |
535 orr a2, a3, a4, lsl #8 | |
536 strh a2, [v2] | |
537 | |
538 ldmfd sp!, {a3, a4} | |
539 ldrh ip, [lr, v1]! | |
540 add a2, a3, v5 | |
541 mov a2, a2, asr #20 | |
542 sub a3, a3, v5 | |
543 and v3, ip, #255 | |
544 adds a2, a2, v3 | |
545 movmi a2, #0 | |
546 cmp a2, #255 | |
547 movgt a2, #255 | |
548 add v3, a4, v6 | |
549 mov v3, v3, asr #20 | |
550 adds v3, v3, ip, lsr #8 | |
551 movmi v3, #0 | |
552 cmp v3, #255 | |
553 movgt v3, #255 | |
554 orr a2, a2, v3, lsl #8 | |
555 sub a4, a4, v6 | |
556 ldrh ip, [v2, -v1]! | |
557 strh a2, [lr] | |
558 mov a3, a3, asr #20 | |
559 and a2, ip, #255 | |
560 adds a3, a3, a2 | |
561 movmi a3, #0 | |
562 cmp a3, #255 | |
563 movgt a3, #255 | |
564 mov a4, a4, asr #20 | |
565 adds a4, a4, ip, lsr #8 | |
566 movmi a4, #0 | |
567 cmp a4, #255 | |
568 movgt a4, #255 | |
569 orr a2, a3, a4, lsl #8 | |
570 strh a2, [v2] | |
571 | |
572 ldmfd sp!, {a3, a4} | |
573 ldrh ip, [lr, v1]! | |
574 add a2, a3, v7 | |
575 mov a2, a2, asr #20 | |
576 sub a3, a3, v7 | |
577 and v3, ip, #255 | |
578 adds a2, a2, v3 | |
579 movmi a2, #0 | |
580 cmp a2, #255 | |
581 movgt a2, #255 | |
582 add v3, a4, fp | |
583 mov v3, v3, asr #20 | |
584 adds v3, v3, ip, lsr #8 | |
585 movmi v3, #0 | |
586 cmp v3, #255 | |
587 movgt v3, #255 | |
588 orr a2, a2, v3, lsl #8 | |
589 sub a4, a4, fp | |
590 ldrh ip, [v2, -v1]! | |
591 strh a2, [lr] | |
592 mov a3, a3, asr #20 | |
593 and a2, ip, #255 | |
594 adds a3, a3, a2 | |
595 movmi a3, #0 | |
596 cmp a3, #255 | |
597 movgt a3, #255 | |
598 mov a4, a4, asr #20 | |
599 adds a4, a4, ip, lsr #8 | |
600 movmi a4, #0 | |
601 cmp a4, #255 | |
602 movgt a4, #255 | |
603 orr a2, a3, a4, lsl #8 | |
604 strh a2, [v2] | |
605 | |
606 ldr pc, [sp], #4 | |
607 .endfunc | |
608 | |
609 .align | |
610 .global simple_idct_armv5te | |
611 .func simple_idct_armv5te | |
612 simple_idct_armv5te: | |
613 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
614 | |
615 bl idct_row_armv5te | |
616 add a1, a1, #16 | |
617 bl idct_row_armv5te | |
618 add a1, a1, #16 | |
619 bl idct_row_armv5te | |
620 add a1, a1, #16 | |
621 bl idct_row_armv5te | |
622 add a1, a1, #16 | |
623 bl idct_row_armv5te | |
624 add a1, a1, #16 | |
625 bl idct_row_armv5te | |
626 add a1, a1, #16 | |
627 bl idct_row_armv5te | |
628 add a1, a1, #16 | |
629 bl idct_row_armv5te | |
630 | |
631 sub a1, a1, #(16*7) | |
632 | |
633 bl idct_col_armv5te | |
634 add a1, a1, #4 | |
635 bl idct_col_armv5te | |
636 add a1, a1, #4 | |
637 bl idct_col_armv5te | |
638 add a1, a1, #4 | |
639 bl idct_col_armv5te | |
640 | |
641 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
642 .endfunc | |
643 | |
644 .align | |
645 .global simple_idct_add_armv5te | |
646 .func simple_idct_add_armv5te | |
647 simple_idct_add_armv5te: | |
648 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
649 | |
650 mov a1, a3 | |
651 | |
652 bl idct_row_armv5te | |
653 add a1, a1, #16 | |
654 bl idct_row_armv5te | |
655 add a1, a1, #16 | |
656 bl idct_row_armv5te | |
657 add a1, a1, #16 | |
658 bl idct_row_armv5te | |
659 add a1, a1, #16 | |
660 bl idct_row_armv5te | |
661 add a1, a1, #16 | |
662 bl idct_row_armv5te | |
663 add a1, a1, #16 | |
664 bl idct_row_armv5te | |
665 add a1, a1, #16 | |
666 bl idct_row_armv5te | |
667 | |
668 sub a1, a1, #(16*7) | |
669 | |
670 bl idct_col_add_armv5te | |
671 add a1, a1, #4 | |
672 bl idct_col_add_armv5te | |
673 add a1, a1, #4 | |
674 bl idct_col_add_armv5te | |
675 add a1, a1, #4 | |
676 bl idct_col_add_armv5te | |
677 | |
678 add sp, sp, #8 | |
679 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
680 .endfunc | |
681 | |
682 .align | |
683 .global simple_idct_put_armv5te | |
684 .func simple_idct_put_armv5te | |
685 simple_idct_put_armv5te: | |
686 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
687 | |
688 mov a1, a3 | |
689 | |
690 bl idct_row_armv5te | |
691 add a1, a1, #16 | |
692 bl idct_row_armv5te | |
693 add a1, a1, #16 | |
694 bl idct_row_armv5te | |
695 add a1, a1, #16 | |
696 bl idct_row_armv5te | |
697 add a1, a1, #16 | |
698 bl idct_row_armv5te | |
699 add a1, a1, #16 | |
700 bl idct_row_armv5te | |
701 add a1, a1, #16 | |
702 bl idct_row_armv5te | |
703 add a1, a1, #16 | |
704 bl idct_row_armv5te | |
705 | |
706 sub a1, a1, #(16*7) | |
707 | |
708 bl idct_col_put_armv5te | |
709 add a1, a1, #4 | |
710 bl idct_col_put_armv5te | |
711 add a1, a1, #4 | |
712 bl idct_col_put_armv5te | |
713 add a1, a1, #4 | |
714 bl idct_col_put_armv5te | |
715 | |
716 add sp, sp, #8 | |
717 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
718 .endfunc |