Mercurial > libavcodec.hg
annotate arm/simple_idct_armv5te.S @ 9629:3c98f3e1b719 libavcodec
Fix bandwith vs. bandwiDth typo.
author | diego |
---|---|
date | Tue, 12 May 2009 23:40:22 +0000 |
parents | 9281a8a9387a |
children | 989ea69f6a4e |
rev | line source |
---|---|
3769 | 1 /* |
2 * Simple IDCT | |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5220 | 5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com> |
3769 | 6 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
3769 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
12 * version 2.1 of the License, or (at your option) any later version. |
3769 | 13 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
3769 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
3769 | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 */ | |
23 | |
8069 | 24 #include "asm.S" |
25 | |
3769 | 26 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |
27 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
28 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
29 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
30 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
31 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
32 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
33 #define ROW_SHIFT 11 | |
34 #define COL_SHIFT 20 | |
35 | |
36 #define W13 (W1 | (W3 << 16)) | |
37 #define W26 (W2 | (W6 << 16)) | |
38 #define W57 (W5 | (W7 << 16)) | |
39 | |
40 .text | |
41 .align | |
42 w13: .long W13 | |
43 w26: .long W26 | |
44 w57: .long W57 | |
45 | |
8069 | 46 function idct_row_armv5te |
3769 | 47 str lr, [sp, #-4]! |
48 | |
49 ldrd v1, [a1, #8] | |
50 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |
51 orrs v1, v1, v2 | |
52 cmpeq v1, a4 | |
53 cmpeq v1, a3, lsr #16 | |
54 beq row_dc_only | |
55 | |
56 mov v1, #(1<<(ROW_SHIFT-1)) | |
57 mov ip, #16384 | |
58 sub ip, ip, #1 /* ip = W4 */ | |
59 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ | |
60 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
61 smultb a2, ip, a4 | |
62 smulbb lr, ip, a4 | |
63 add v2, v1, a2 | |
64 sub v3, v1, a2 | |
65 sub v4, v1, lr | |
66 add v1, v1, lr | |
67 | |
68 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ | |
69 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */ | |
70 smulbt v5, ip, a3 | |
71 smultt v6, lr, a4 | |
72 smlatt v5, ip, a4, v5 | |
73 smultt a2, ip, a3 | |
74 smulbt v7, lr, a3 | |
75 sub v6, v6, a2 | |
76 smulbt a2, ip, a4 | |
77 smultt fp, lr, a3 | |
78 sub v7, v7, a2 | |
79 smulbt a2, lr, a4 | |
80 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |
81 sub fp, fp, a2 | |
82 | |
83 orrs a2, a3, a4 | |
84 beq 1f | |
85 | |
86 smlabt v5, lr, a3, v5 | |
87 smlabt v6, ip, a3, v6 | |
88 smlatt v5, lr, a4, v5 | |
89 smlabt v6, lr, a4, v6 | |
90 smlatt v7, lr, a3, v7 | |
91 smlatt fp, ip, a3, fp | |
92 smulbt a2, ip, a4 | |
93 smlatt v7, ip, a4, v7 | |
94 sub fp, fp, a2 | |
95 | |
96 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
97 mov a2, #16384 | |
98 sub a2, a2, #1 /* a2 = W4 */ | |
99 smulbb a2, a2, a3 /* a2 = W4*row[4] */ | |
100 smultb lr, ip, a4 /* lr = W6*row[6] */ | |
101 add v1, v1, a2 /* v1 += W4*row[4] */ | |
102 add v1, v1, lr /* v1 += W6*row[6] */ | |
103 add v4, v4, a2 /* v4 += W4*row[4] */ | |
104 sub v4, v4, lr /* v4 -= W6*row[6] */ | |
105 smulbb lr, ip, a4 /* lr = W2*row[6] */ | |
106 sub v2, v2, a2 /* v2 -= W4*row[4] */ | |
107 sub v2, v2, lr /* v2 -= W2*row[6] */ | |
108 sub v3, v3, a2 /* v3 -= W4*row[4] */ | |
109 add v3, v3, lr /* v3 += W2*row[6] */ | |
110 | |
111 1: add a2, v1, v5 | |
112 mov a3, a2, lsr #11 | |
113 bic a3, a3, #0x1f0000 | |
114 sub a2, v2, v6 | |
115 mov a2, a2, lsr #11 | |
116 add a3, a3, a2, lsl #16 | |
117 add a2, v3, v7 | |
118 mov a4, a2, lsr #11 | |
119 bic a4, a4, #0x1f0000 | |
120 add a2, v4, fp | |
121 mov a2, a2, lsr #11 | |
122 add a4, a4, a2, lsl #16 | |
123 strd a3, [a1] | |
124 | |
125 sub a2, v4, fp | |
126 mov a3, a2, lsr #11 | |
127 bic a3, a3, #0x1f0000 | |
128 sub a2, v3, v7 | |
129 mov a2, a2, lsr #11 | |
130 add a3, a3, a2, lsl #16 | |
131 add a2, v2, v6 | |
132 mov a4, a2, lsr #11 | |
133 bic a4, a4, #0x1f0000 | |
134 sub a2, v1, v5 | |
135 mov a2, a2, lsr #11 | |
136 add a4, a4, a2, lsl #16 | |
137 strd a3, [a1, #8] | |
138 | |
139 ldr pc, [sp], #4 | |
140 | |
141 row_dc_only: | |
142 orr a3, a3, a3, lsl #16 | |
143 bic a3, a3, #0xe000 | |
144 mov a3, a3, lsl #3 | |
145 mov a4, a3 | |
146 strd a3, [a1] | |
147 strd a3, [a1, #8] | |
148 | |
149 ldr pc, [sp], #4 | |
150 .endfunc | |
151 | |
152 .macro idct_col | |
153 ldr a4, [a1] /* a4 = col[1:0] */ | |
154 mov ip, #16384 | |
155 sub ip, ip, #1 /* ip = W4 */ | |
156 #if 0 | |
157 mov v1, #(1<<(COL_SHIFT-1)) | |
158 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ | |
159 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ | |
160 ldr a4, [a1, #(16*4)] | |
161 #else | |
162 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ | |
163 add v2, v1, a4, asr #16 | |
164 rsb v2, v2, v2, lsl #14 | |
165 mov a4, a4, lsl #16 | |
166 add v1, v1, a4, asr #16 | |
167 ldr a4, [a1, #(16*4)] | |
168 rsb v1, v1, v1, lsl #14 | |
169 #endif | |
170 | |
171 smulbb lr, ip, a4 | |
172 smulbt a3, ip, a4 | |
173 sub v3, v1, lr | |
174 sub v5, v1, lr | |
175 add v7, v1, lr | |
176 add v1, v1, lr | |
177 sub v4, v2, a3 | |
178 sub v6, v2, a3 | |
179 add fp, v2, a3 | |
180 ldr ip, [pc, #(w26-.-8)] | |
181 ldr a4, [a1, #(16*2)] | |
182 add v2, v2, a3 | |
183 | |
184 smulbb lr, ip, a4 | |
185 smultb a3, ip, a4 | |
186 add v1, v1, lr | |
187 sub v7, v7, lr | |
188 add v3, v3, a3 | |
189 sub v5, v5, a3 | |
190 smulbt lr, ip, a4 | |
191 smultt a3, ip, a4 | |
192 add v2, v2, lr | |
193 sub fp, fp, lr | |
194 add v4, v4, a3 | |
195 ldr a4, [a1, #(16*6)] | |
196 sub v6, v6, a3 | |
197 | |
198 smultb lr, ip, a4 | |
199 smulbb a3, ip, a4 | |
200 add v1, v1, lr | |
201 sub v7, v7, lr | |
202 sub v3, v3, a3 | |
203 add v5, v5, a3 | |
204 smultt lr, ip, a4 | |
205 smulbt a3, ip, a4 | |
206 add v2, v2, lr | |
207 sub fp, fp, lr | |
208 sub v4, v4, a3 | |
209 add v6, v6, a3 | |
210 | |
211 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} | |
212 | |
213 ldr ip, [pc, #(w13-.-8)] | |
214 ldr a4, [a1, #(16*1)] | |
215 ldr lr, [pc, #(w57-.-8)] | |
216 smulbb v1, ip, a4 | |
217 smultb v3, ip, a4 | |
218 smulbb v5, lr, a4 | |
219 smultb v7, lr, a4 | |
220 smulbt v2, ip, a4 | |
221 smultt v4, ip, a4 | |
222 smulbt v6, lr, a4 | |
223 smultt fp, lr, a4 | |
224 rsb v4, v4, #0 | |
225 ldr a4, [a1, #(16*3)] | |
226 rsb v3, v3, #0 | |
227 | |
228 smlatb v1, ip, a4, v1 | |
229 smlatb v3, lr, a4, v3 | |
230 smulbb a3, ip, a4 | |
231 smulbb a2, lr, a4 | |
232 sub v5, v5, a3 | |
233 sub v7, v7, a2 | |
234 smlatt v2, ip, a4, v2 | |
235 smlatt v4, lr, a4, v4 | |
236 smulbt a3, ip, a4 | |
237 smulbt a2, lr, a4 | |
238 sub v6, v6, a3 | |
239 ldr a4, [a1, #(16*5)] | |
240 sub fp, fp, a2 | |
241 | |
242 smlabb v1, lr, a4, v1 | |
243 smlabb v3, ip, a4, v3 | |
244 smlatb v5, lr, a4, v5 | |
245 smlatb v7, ip, a4, v7 | |
246 smlabt v2, lr, a4, v2 | |
247 smlabt v4, ip, a4, v4 | |
248 smlatt v6, lr, a4, v6 | |
249 ldr a3, [a1, #(16*7)] | |
250 smlatt fp, ip, a4, fp | |
251 | |
252 smlatb v1, lr, a3, v1 | |
253 smlabb v3, lr, a3, v3 | |
254 smlatb v5, ip, a3, v5 | |
255 smulbb a4, ip, a3 | |
256 smlatt v2, lr, a3, v2 | |
257 sub v7, v7, a4 | |
258 smlabt v4, lr, a3, v4 | |
259 smulbt a4, ip, a3 | |
260 smlatt v6, ip, a3, v6 | |
261 sub fp, fp, a4 | |
262 .endm | |
263 | |
8069 | 264 function idct_col_armv5te |
3769 | 265 str lr, [sp, #-4]! |
266 | |
267 idct_col | |
268 | |
269 ldmfd sp!, {a3, a4} | |
270 adds a2, a3, v1 | |
271 mov a2, a2, lsr #20 | |
272 orrmi a2, a2, #0xf000 | |
273 add ip, a4, v2 | |
274 mov ip, ip, asr #20 | |
275 orr a2, a2, ip, lsl #16 | |
276 str a2, [a1] | |
277 subs a3, a3, v1 | |
278 mov a2, a3, lsr #20 | |
279 orrmi a2, a2, #0xf000 | |
280 sub a4, a4, v2 | |
281 mov a4, a4, asr #20 | |
282 orr a2, a2, a4, lsl #16 | |
283 ldmfd sp!, {a3, a4} | |
284 str a2, [a1, #(16*7)] | |
285 | |
286 subs a2, a3, v3 | |
287 mov a2, a2, lsr #20 | |
288 orrmi a2, a2, #0xf000 | |
289 sub ip, a4, v4 | |
290 mov ip, ip, asr #20 | |
291 orr a2, a2, ip, lsl #16 | |
292 str a2, [a1, #(16*1)] | |
293 adds a3, a3, v3 | |
294 mov a2, a3, lsr #20 | |
295 orrmi a2, a2, #0xf000 | |
296 add a4, a4, v4 | |
297 mov a4, a4, asr #20 | |
298 orr a2, a2, a4, lsl #16 | |
299 ldmfd sp!, {a3, a4} | |
300 str a2, [a1, #(16*6)] | |
301 | |
302 adds a2, a3, v5 | |
303 mov a2, a2, lsr #20 | |
304 orrmi a2, a2, #0xf000 | |
305 add ip, a4, v6 | |
306 mov ip, ip, asr #20 | |
307 orr a2, a2, ip, lsl #16 | |
308 str a2, [a1, #(16*2)] | |
309 subs a3, a3, v5 | |
310 mov a2, a3, lsr #20 | |
311 orrmi a2, a2, #0xf000 | |
312 sub a4, a4, v6 | |
313 mov a4, a4, asr #20 | |
314 orr a2, a2, a4, lsl #16 | |
315 ldmfd sp!, {a3, a4} | |
316 str a2, [a1, #(16*5)] | |
317 | |
318 adds a2, a3, v7 | |
319 mov a2, a2, lsr #20 | |
320 orrmi a2, a2, #0xf000 | |
321 add ip, a4, fp | |
322 mov ip, ip, asr #20 | |
323 orr a2, a2, ip, lsl #16 | |
324 str a2, [a1, #(16*3)] | |
325 subs a3, a3, v7 | |
326 mov a2, a3, lsr #20 | |
327 orrmi a2, a2, #0xf000 | |
328 sub a4, a4, fp | |
329 mov a4, a4, asr #20 | |
330 orr a2, a2, a4, lsl #16 | |
331 str a2, [a1, #(16*4)] | |
332 | |
333 ldr pc, [sp], #4 | |
334 .endfunc | |
335 | |
8069 | 336 function idct_col_put_armv5te |
3769 | 337 str lr, [sp, #-4]! |
338 | |
339 idct_col | |
340 | |
341 ldmfd sp!, {a3, a4} | |
342 ldr lr, [sp, #32] | |
343 add a2, a3, v1 | |
344 movs a2, a2, asr #20 | |
345 movmi a2, #0 | |
346 cmp a2, #255 | |
347 movgt a2, #255 | |
348 add ip, a4, v2 | |
349 movs ip, ip, asr #20 | |
350 movmi ip, #0 | |
351 cmp ip, #255 | |
352 movgt ip, #255 | |
353 orr a2, a2, ip, lsl #8 | |
354 sub a3, a3, v1 | |
355 movs a3, a3, asr #20 | |
356 movmi a3, #0 | |
357 cmp a3, #255 | |
358 movgt a3, #255 | |
359 sub a4, a4, v2 | |
360 movs a4, a4, asr #20 | |
361 movmi a4, #0 | |
362 cmp a4, #255 | |
363 ldr v1, [sp, #28] | |
364 movgt a4, #255 | |
365 strh a2, [v1] | |
366 add a2, v1, #2 | |
367 str a2, [sp, #28] | |
368 orr a2, a3, a4, lsl #8 | |
369 rsb v2, lr, lr, lsl #3 | |
370 ldmfd sp!, {a3, a4} | |
371 strh a2, [v2, v1]! | |
372 | |
373 sub a2, a3, v3 | |
374 movs a2, a2, asr #20 | |
375 movmi a2, #0 | |
376 cmp a2, #255 | |
377 movgt a2, #255 | |
378 sub ip, a4, v4 | |
379 movs ip, ip, asr #20 | |
380 movmi ip, #0 | |
381 cmp ip, #255 | |
382 movgt ip, #255 | |
383 orr a2, a2, ip, lsl #8 | |
384 strh a2, [v1, lr]! | |
385 add a3, a3, v3 | |
386 movs a2, a3, asr #20 | |
387 movmi a2, #0 | |
388 cmp a2, #255 | |
389 movgt a2, #255 | |
390 add a4, a4, v4 | |
391 movs a4, a4, asr #20 | |
392 movmi a4, #0 | |
393 cmp a4, #255 | |
394 movgt a4, #255 | |
395 orr a2, a2, a4, lsl #8 | |
396 ldmfd sp!, {a3, a4} | |
397 strh a2, [v2, -lr]! | |
398 | |
399 add a2, a3, v5 | |
400 movs a2, a2, asr #20 | |
401 movmi a2, #0 | |
402 cmp a2, #255 | |
403 movgt a2, #255 | |
404 add ip, a4, v6 | |
405 movs ip, ip, asr #20 | |
406 movmi ip, #0 | |
407 cmp ip, #255 | |
408 movgt ip, #255 | |
409 orr a2, a2, ip, lsl #8 | |
410 strh a2, [v1, lr]! | |
411 sub a3, a3, v5 | |
412 movs a2, a3, asr #20 | |
413 movmi a2, #0 | |
414 cmp a2, #255 | |
415 movgt a2, #255 | |
416 sub a4, a4, v6 | |
417 movs a4, a4, asr #20 | |
418 movmi a4, #0 | |
419 cmp a4, #255 | |
420 movgt a4, #255 | |
421 orr a2, a2, a4, lsl #8 | |
422 ldmfd sp!, {a3, a4} | |
423 strh a2, [v2, -lr]! | |
424 | |
425 add a2, a3, v7 | |
426 movs a2, a2, asr #20 | |
427 movmi a2, #0 | |
428 cmp a2, #255 | |
429 movgt a2, #255 | |
430 add ip, a4, fp | |
431 movs ip, ip, asr #20 | |
432 movmi ip, #0 | |
433 cmp ip, #255 | |
434 movgt ip, #255 | |
435 orr a2, a2, ip, lsl #8 | |
436 strh a2, [v1, lr] | |
437 sub a3, a3, v7 | |
438 movs a2, a3, asr #20 | |
439 movmi a2, #0 | |
440 cmp a2, #255 | |
441 movgt a2, #255 | |
442 sub a4, a4, fp | |
443 movs a4, a4, asr #20 | |
444 movmi a4, #0 | |
445 cmp a4, #255 | |
446 movgt a4, #255 | |
447 orr a2, a2, a4, lsl #8 | |
448 strh a2, [v2, -lr] | |
449 | |
450 ldr pc, [sp], #4 | |
451 .endfunc | |
452 | |
8069 | 453 function idct_col_add_armv5te |
3769 | 454 str lr, [sp, #-4]! |
455 | |
456 idct_col | |
457 | |
458 ldr lr, [sp, #36] | |
459 | |
460 ldmfd sp!, {a3, a4} | |
461 ldrh ip, [lr] | |
462 add a2, a3, v1 | |
463 mov a2, a2, asr #20 | |
464 sub a3, a3, v1 | |
465 and v1, ip, #255 | |
466 adds a2, a2, v1 | |
467 movmi a2, #0 | |
468 cmp a2, #255 | |
469 movgt a2, #255 | |
470 add v1, a4, v2 | |
471 mov v1, v1, asr #20 | |
472 adds v1, v1, ip, lsr #8 | |
473 movmi v1, #0 | |
474 cmp v1, #255 | |
475 movgt v1, #255 | |
476 orr a2, a2, v1, lsl #8 | |
477 ldr v1, [sp, #32] | |
478 sub a4, a4, v2 | |
479 rsb v2, v1, v1, lsl #3 | |
480 ldrh ip, [v2, lr]! | |
481 strh a2, [lr] | |
482 mov a3, a3, asr #20 | |
483 and a2, ip, #255 | |
484 adds a3, a3, a2 | |
485 movmi a3, #0 | |
486 cmp a3, #255 | |
487 movgt a3, #255 | |
488 mov a4, a4, asr #20 | |
489 adds a4, a4, ip, lsr #8 | |
490 movmi a4, #0 | |
491 cmp a4, #255 | |
492 movgt a4, #255 | |
493 add a2, lr, #2 | |
494 str a2, [sp, #28] | |
495 orr a2, a3, a4, lsl #8 | |
496 strh a2, [v2] | |
497 | |
498 ldmfd sp!, {a3, a4} | |
499 ldrh ip, [lr, v1]! | |
500 sub a2, a3, v3 | |
501 mov a2, a2, asr #20 | |
502 add a3, a3, v3 | |
503 and v3, ip, #255 | |
504 adds a2, a2, v3 | |
505 movmi a2, #0 | |
506 cmp a2, #255 | |
507 movgt a2, #255 | |
508 sub v3, a4, v4 | |
509 mov v3, v3, asr #20 | |
510 adds v3, v3, ip, lsr #8 | |
511 movmi v3, #0 | |
512 cmp v3, #255 | |
513 movgt v3, #255 | |
514 orr a2, a2, v3, lsl #8 | |
515 add a4, a4, v4 | |
516 ldrh ip, [v2, -v1]! | |
517 strh a2, [lr] | |
518 mov a3, a3, asr #20 | |
519 and a2, ip, #255 | |
520 adds a3, a3, a2 | |
521 movmi a3, #0 | |
522 cmp a3, #255 | |
523 movgt a3, #255 | |
524 mov a4, a4, asr #20 | |
525 adds a4, a4, ip, lsr #8 | |
526 movmi a4, #0 | |
527 cmp a4, #255 | |
528 movgt a4, #255 | |
529 orr a2, a3, a4, lsl #8 | |
530 strh a2, [v2] | |
531 | |
532 ldmfd sp!, {a3, a4} | |
533 ldrh ip, [lr, v1]! | |
534 add a2, a3, v5 | |
535 mov a2, a2, asr #20 | |
536 sub a3, a3, v5 | |
537 and v3, ip, #255 | |
538 adds a2, a2, v3 | |
539 movmi a2, #0 | |
540 cmp a2, #255 | |
541 movgt a2, #255 | |
542 add v3, a4, v6 | |
543 mov v3, v3, asr #20 | |
544 adds v3, v3, ip, lsr #8 | |
545 movmi v3, #0 | |
546 cmp v3, #255 | |
547 movgt v3, #255 | |
548 orr a2, a2, v3, lsl #8 | |
549 sub a4, a4, v6 | |
550 ldrh ip, [v2, -v1]! | |
551 strh a2, [lr] | |
552 mov a3, a3, asr #20 | |
553 and a2, ip, #255 | |
554 adds a3, a3, a2 | |
555 movmi a3, #0 | |
556 cmp a3, #255 | |
557 movgt a3, #255 | |
558 mov a4, a4, asr #20 | |
559 adds a4, a4, ip, lsr #8 | |
560 movmi a4, #0 | |
561 cmp a4, #255 | |
562 movgt a4, #255 | |
563 orr a2, a3, a4, lsl #8 | |
564 strh a2, [v2] | |
565 | |
566 ldmfd sp!, {a3, a4} | |
567 ldrh ip, [lr, v1]! | |
568 add a2, a3, v7 | |
569 mov a2, a2, asr #20 | |
570 sub a3, a3, v7 | |
571 and v3, ip, #255 | |
572 adds a2, a2, v3 | |
573 movmi a2, #0 | |
574 cmp a2, #255 | |
575 movgt a2, #255 | |
576 add v3, a4, fp | |
577 mov v3, v3, asr #20 | |
578 adds v3, v3, ip, lsr #8 | |
579 movmi v3, #0 | |
580 cmp v3, #255 | |
581 movgt v3, #255 | |
582 orr a2, a2, v3, lsl #8 | |
583 sub a4, a4, fp | |
584 ldrh ip, [v2, -v1]! | |
585 strh a2, [lr] | |
586 mov a3, a3, asr #20 | |
587 and a2, ip, #255 | |
588 adds a3, a3, a2 | |
589 movmi a3, #0 | |
590 cmp a3, #255 | |
591 movgt a3, #255 | |
592 mov a4, a4, asr #20 | |
593 adds a4, a4, ip, lsr #8 | |
594 movmi a4, #0 | |
595 cmp a4, #255 | |
596 movgt a4, #255 | |
597 orr a2, a3, a4, lsl #8 | |
598 strh a2, [v2] | |
599 | |
600 ldr pc, [sp], #4 | |
601 .endfunc | |
602 | |
8069 | 603 function simple_idct_armv5te, export=1 |
3769 | 604 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} |
605 | |
606 bl idct_row_armv5te | |
607 add a1, a1, #16 | |
608 bl idct_row_armv5te | |
609 add a1, a1, #16 | |
610 bl idct_row_armv5te | |
611 add a1, a1, #16 | |
612 bl idct_row_armv5te | |
613 add a1, a1, #16 | |
614 bl idct_row_armv5te | |
615 add a1, a1, #16 | |
616 bl idct_row_armv5te | |
617 add a1, a1, #16 | |
618 bl idct_row_armv5te | |
619 add a1, a1, #16 | |
620 bl idct_row_armv5te | |
621 | |
622 sub a1, a1, #(16*7) | |
623 | |
624 bl idct_col_armv5te | |
625 add a1, a1, #4 | |
626 bl idct_col_armv5te | |
627 add a1, a1, #4 | |
628 bl idct_col_armv5te | |
629 add a1, a1, #4 | |
630 bl idct_col_armv5te | |
631 | |
632 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
633 .endfunc | |
634 | |
8069 | 635 function simple_idct_add_armv5te, export=1 |
3769 | 636 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} |
637 | |
638 mov a1, a3 | |
639 | |
640 bl idct_row_armv5te | |
641 add a1, a1, #16 | |
642 bl idct_row_armv5te | |
643 add a1, a1, #16 | |
644 bl idct_row_armv5te | |
645 add a1, a1, #16 | |
646 bl idct_row_armv5te | |
647 add a1, a1, #16 | |
648 bl idct_row_armv5te | |
649 add a1, a1, #16 | |
650 bl idct_row_armv5te | |
651 add a1, a1, #16 | |
652 bl idct_row_armv5te | |
653 add a1, a1, #16 | |
654 bl idct_row_armv5te | |
655 | |
656 sub a1, a1, #(16*7) | |
657 | |
658 bl idct_col_add_armv5te | |
659 add a1, a1, #4 | |
660 bl idct_col_add_armv5te | |
661 add a1, a1, #4 | |
662 bl idct_col_add_armv5te | |
663 add a1, a1, #4 | |
664 bl idct_col_add_armv5te | |
665 | |
666 add sp, sp, #8 | |
667 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
668 .endfunc | |
669 | |
8069 | 670 function simple_idct_put_armv5te, export=1 |
3769 | 671 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} |
672 | |
673 mov a1, a3 | |
674 | |
675 bl idct_row_armv5te | |
676 add a1, a1, #16 | |
677 bl idct_row_armv5te | |
678 add a1, a1, #16 | |
679 bl idct_row_armv5te | |
680 add a1, a1, #16 | |
681 bl idct_row_armv5te | |
682 add a1, a1, #16 | |
683 bl idct_row_armv5te | |
684 add a1, a1, #16 | |
685 bl idct_row_armv5te | |
686 add a1, a1, #16 | |
687 bl idct_row_armv5te | |
688 add a1, a1, #16 | |
689 bl idct_row_armv5te | |
690 | |
691 sub a1, a1, #(16*7) | |
692 | |
693 bl idct_col_put_armv5te | |
694 add a1, a1, #4 | |
695 bl idct_col_put_armv5te | |
696 add a1, a1, #4 | |
697 bl idct_col_put_armv5te | |
698 add a1, a1, #4 | |
699 bl idct_col_put_armv5te | |
700 | |
701 add sp, sp, #8 | |
702 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
703 .endfunc |