Mercurial > libavcodec.hg
annotate armv4l/simple_idct_armv5te.S @ 5372:04a9bb415804 libavcodec
Another minor simplification
author | vitor |
---|---|
date | Wed, 18 Jul 2007 20:23:43 +0000 |
parents | 744e91a36a23 |
children | 316762ae96a7 |
rev | line source |
---|---|
3769 | 1 /* |
2 * Simple IDCT | |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5220 | 5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com> |
3769 | 6 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
3769 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
12 * version 2.1 of the License, or (at your option) any later version. |
3769 | 13 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
3769 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
3769 | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 */ | |
23 | |
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
31 #define ROW_SHIFT 11 | |
32 #define COL_SHIFT 20 | |
33 | |
34 #define W13 (W1 | (W3 << 16)) | |
35 #define W26 (W2 | (W6 << 16)) | |
36 #define W57 (W5 | (W7 << 16)) | |
37 | |
38 .text | |
39 .align | |
40 w13: .long W13 | |
41 w26: .long W26 | |
42 w57: .long W57 | |
43 | |
44 .align | |
4867
97d82c7585b4
add .type foo, %function directives for the benefit of debuggers
mru
parents:
3947
diff
changeset
|
45 .type idct_row_armv5te, %function |
3769 | 46 .func idct_row_armv5te |
47 idct_row_armv5te: | |
48 str lr, [sp, #-4]! | |
49 | |
50 ldrd v1, [a1, #8] | |
51 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |
52 orrs v1, v1, v2 | |
53 cmpeq v1, a4 | |
54 cmpeq v1, a3, lsr #16 | |
55 beq row_dc_only | |
56 | |
57 mov v1, #(1<<(ROW_SHIFT-1)) | |
58 mov ip, #16384 | |
59 sub ip, ip, #1 /* ip = W4 */ | |
60 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ | |
61 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
62 smultb a2, ip, a4 | |
63 smulbb lr, ip, a4 | |
64 add v2, v1, a2 | |
65 sub v3, v1, a2 | |
66 sub v4, v1, lr | |
67 add v1, v1, lr | |
68 | |
69 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ | |
70 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */ | |
71 smulbt v5, ip, a3 | |
72 smultt v6, lr, a4 | |
73 smlatt v5, ip, a4, v5 | |
74 smultt a2, ip, a3 | |
75 smulbt v7, lr, a3 | |
76 sub v6, v6, a2 | |
77 smulbt a2, ip, a4 | |
78 smultt fp, lr, a3 | |
79 sub v7, v7, a2 | |
80 smulbt a2, lr, a4 | |
81 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |
82 sub fp, fp, a2 | |
83 | |
84 orrs a2, a3, a4 | |
85 beq 1f | |
86 | |
87 smlabt v5, lr, a3, v5 | |
88 smlabt v6, ip, a3, v6 | |
89 smlatt v5, lr, a4, v5 | |
90 smlabt v6, lr, a4, v6 | |
91 smlatt v7, lr, a3, v7 | |
92 smlatt fp, ip, a3, fp | |
93 smulbt a2, ip, a4 | |
94 smlatt v7, ip, a4, v7 | |
95 sub fp, fp, a2 | |
96 | |
97 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
98 mov a2, #16384 | |
99 sub a2, a2, #1 /* a2 = W4 */ | |
100 smulbb a2, a2, a3 /* a2 = W4*row[4] */ | |
101 smultb lr, ip, a4 /* lr = W6*row[6] */ | |
102 add v1, v1, a2 /* v1 += W4*row[4] */ | |
103 add v1, v1, lr /* v1 += W6*row[6] */ | |
104 add v4, v4, a2 /* v4 += W4*row[4] */ | |
105 sub v4, v4, lr /* v4 -= W6*row[6] */ | |
106 smulbb lr, ip, a4 /* lr = W2*row[6] */ | |
107 sub v2, v2, a2 /* v2 -= W4*row[4] */ | |
108 sub v2, v2, lr /* v2 -= W2*row[6] */ | |
109 sub v3, v3, a2 /* v3 -= W4*row[4] */ | |
110 add v3, v3, lr /* v3 += W2*row[6] */ | |
111 | |
112 1: add a2, v1, v5 | |
113 mov a3, a2, lsr #11 | |
114 bic a3, a3, #0x1f0000 | |
115 sub a2, v2, v6 | |
116 mov a2, a2, lsr #11 | |
117 add a3, a3, a2, lsl #16 | |
118 add a2, v3, v7 | |
119 mov a4, a2, lsr #11 | |
120 bic a4, a4, #0x1f0000 | |
121 add a2, v4, fp | |
122 mov a2, a2, lsr #11 | |
123 add a4, a4, a2, lsl #16 | |
124 strd a3, [a1] | |
125 | |
126 sub a2, v4, fp | |
127 mov a3, a2, lsr #11 | |
128 bic a3, a3, #0x1f0000 | |
129 sub a2, v3, v7 | |
130 mov a2, a2, lsr #11 | |
131 add a3, a3, a2, lsl #16 | |
132 add a2, v2, v6 | |
133 mov a4, a2, lsr #11 | |
134 bic a4, a4, #0x1f0000 | |
135 sub a2, v1, v5 | |
136 mov a2, a2, lsr #11 | |
137 add a4, a4, a2, lsl #16 | |
138 strd a3, [a1, #8] | |
139 | |
140 ldr pc, [sp], #4 | |
141 | |
142 row_dc_only: | |
143 orr a3, a3, a3, lsl #16 | |
144 bic a3, a3, #0xe000 | |
145 mov a3, a3, lsl #3 | |
146 mov a4, a3 | |
147 strd a3, [a1] | |
148 strd a3, [a1, #8] | |
149 | |
150 ldr pc, [sp], #4 | |
151 .endfunc | |
152 | |
153 .macro idct_col | |
154 ldr a4, [a1] /* a4 = col[1:0] */ | |
155 mov ip, #16384 | |
156 sub ip, ip, #1 /* ip = W4 */ | |
157 #if 0 | |
158 mov v1, #(1<<(COL_SHIFT-1)) | |
159 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ | |
160 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ | |
161 ldr a4, [a1, #(16*4)] | |
162 #else | |
163 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ | |
164 add v2, v1, a4, asr #16 | |
165 rsb v2, v2, v2, lsl #14 | |
166 mov a4, a4, lsl #16 | |
167 add v1, v1, a4, asr #16 | |
168 ldr a4, [a1, #(16*4)] | |
169 rsb v1, v1, v1, lsl #14 | |
170 #endif | |
171 | |
172 smulbb lr, ip, a4 | |
173 smulbt a3, ip, a4 | |
174 sub v3, v1, lr | |
175 sub v5, v1, lr | |
176 add v7, v1, lr | |
177 add v1, v1, lr | |
178 sub v4, v2, a3 | |
179 sub v6, v2, a3 | |
180 add fp, v2, a3 | |
181 ldr ip, [pc, #(w26-.-8)] | |
182 ldr a4, [a1, #(16*2)] | |
183 add v2, v2, a3 | |
184 | |
185 smulbb lr, ip, a4 | |
186 smultb a3, ip, a4 | |
187 add v1, v1, lr | |
188 sub v7, v7, lr | |
189 add v3, v3, a3 | |
190 sub v5, v5, a3 | |
191 smulbt lr, ip, a4 | |
192 smultt a3, ip, a4 | |
193 add v2, v2, lr | |
194 sub fp, fp, lr | |
195 add v4, v4, a3 | |
196 ldr a4, [a1, #(16*6)] | |
197 sub v6, v6, a3 | |
198 | |
199 smultb lr, ip, a4 | |
200 smulbb a3, ip, a4 | |
201 add v1, v1, lr | |
202 sub v7, v7, lr | |
203 sub v3, v3, a3 | |
204 add v5, v5, a3 | |
205 smultt lr, ip, a4 | |
206 smulbt a3, ip, a4 | |
207 add v2, v2, lr | |
208 sub fp, fp, lr | |
209 sub v4, v4, a3 | |
210 add v6, v6, a3 | |
211 | |
212 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} | |
213 | |
214 ldr ip, [pc, #(w13-.-8)] | |
215 ldr a4, [a1, #(16*1)] | |
216 ldr lr, [pc, #(w57-.-8)] | |
217 smulbb v1, ip, a4 | |
218 smultb v3, ip, a4 | |
219 smulbb v5, lr, a4 | |
220 smultb v7, lr, a4 | |
221 smulbt v2, ip, a4 | |
222 smultt v4, ip, a4 | |
223 smulbt v6, lr, a4 | |
224 smultt fp, lr, a4 | |
225 rsb v4, v4, #0 | |
226 ldr a4, [a1, #(16*3)] | |
227 rsb v3, v3, #0 | |
228 | |
229 smlatb v1, ip, a4, v1 | |
230 smlatb v3, lr, a4, v3 | |
231 smulbb a3, ip, a4 | |
232 smulbb a2, lr, a4 | |
233 sub v5, v5, a3 | |
234 sub v7, v7, a2 | |
235 smlatt v2, ip, a4, v2 | |
236 smlatt v4, lr, a4, v4 | |
237 smulbt a3, ip, a4 | |
238 smulbt a2, lr, a4 | |
239 sub v6, v6, a3 | |
240 ldr a4, [a1, #(16*5)] | |
241 sub fp, fp, a2 | |
242 | |
243 smlabb v1, lr, a4, v1 | |
244 smlabb v3, ip, a4, v3 | |
245 smlatb v5, lr, a4, v5 | |
246 smlatb v7, ip, a4, v7 | |
247 smlabt v2, lr, a4, v2 | |
248 smlabt v4, ip, a4, v4 | |
249 smlatt v6, lr, a4, v6 | |
250 ldr a3, [a1, #(16*7)] | |
251 smlatt fp, ip, a4, fp | |
252 | |
253 smlatb v1, lr, a3, v1 | |
254 smlabb v3, lr, a3, v3 | |
255 smlatb v5, ip, a3, v5 | |
256 smulbb a4, ip, a3 | |
257 smlatt v2, lr, a3, v2 | |
258 sub v7, v7, a4 | |
259 smlabt v4, lr, a3, v4 | |
260 smulbt a4, ip, a3 | |
261 smlatt v6, ip, a3, v6 | |
262 sub fp, fp, a4 | |
263 .endm | |
264 | |
265 .align | |
4867
97d82c7585b4
add .type foo, %function directives for the benefit of debuggers
mru
parents:
3947
diff
changeset
|
266 .type idct_col_armv5te, %function |
3769 | 267 .func idct_col_armv5te |
268 idct_col_armv5te: | |
269 str lr, [sp, #-4]! | |
270 | |
271 idct_col | |
272 | |
273 ldmfd sp!, {a3, a4} | |
274 adds a2, a3, v1 | |
275 mov a2, a2, lsr #20 | |
276 orrmi a2, a2, #0xf000 | |
277 add ip, a4, v2 | |
278 mov ip, ip, asr #20 | |
279 orr a2, a2, ip, lsl #16 | |
280 str a2, [a1] | |
281 subs a3, a3, v1 | |
282 mov a2, a3, lsr #20 | |
283 orrmi a2, a2, #0xf000 | |
284 sub a4, a4, v2 | |
285 mov a4, a4, asr #20 | |
286 orr a2, a2, a4, lsl #16 | |
287 ldmfd sp!, {a3, a4} | |
288 str a2, [a1, #(16*7)] | |
289 | |
290 subs a2, a3, v3 | |
291 mov a2, a2, lsr #20 | |
292 orrmi a2, a2, #0xf000 | |
293 sub ip, a4, v4 | |
294 mov ip, ip, asr #20 | |
295 orr a2, a2, ip, lsl #16 | |
296 str a2, [a1, #(16*1)] | |
297 adds a3, a3, v3 | |
298 mov a2, a3, lsr #20 | |
299 orrmi a2, a2, #0xf000 | |
300 add a4, a4, v4 | |
301 mov a4, a4, asr #20 | |
302 orr a2, a2, a4, lsl #16 | |
303 ldmfd sp!, {a3, a4} | |
304 str a2, [a1, #(16*6)] | |
305 | |
306 adds a2, a3, v5 | |
307 mov a2, a2, lsr #20 | |
308 orrmi a2, a2, #0xf000 | |
309 add ip, a4, v6 | |
310 mov ip, ip, asr #20 | |
311 orr a2, a2, ip, lsl #16 | |
312 str a2, [a1, #(16*2)] | |
313 subs a3, a3, v5 | |
314 mov a2, a3, lsr #20 | |
315 orrmi a2, a2, #0xf000 | |
316 sub a4, a4, v6 | |
317 mov a4, a4, asr #20 | |
318 orr a2, a2, a4, lsl #16 | |
319 ldmfd sp!, {a3, a4} | |
320 str a2, [a1, #(16*5)] | |
321 | |
322 adds a2, a3, v7 | |
323 mov a2, a2, lsr #20 | |
324 orrmi a2, a2, #0xf000 | |
325 add ip, a4, fp | |
326 mov ip, ip, asr #20 | |
327 orr a2, a2, ip, lsl #16 | |
328 str a2, [a1, #(16*3)] | |
329 subs a3, a3, v7 | |
330 mov a2, a3, lsr #20 | |
331 orrmi a2, a2, #0xf000 | |
332 sub a4, a4, fp | |
333 mov a4, a4, asr #20 | |
334 orr a2, a2, a4, lsl #16 | |
335 str a2, [a1, #(16*4)] | |
336 | |
337 ldr pc, [sp], #4 | |
338 .endfunc | |
339 | |
340 .align | |
4867
97d82c7585b4
add .type foo, %function directives for the benefit of debuggers
mru
parents:
3947
diff
changeset
|
341 .type idct_col_put_armv5te, %function |
3769 | 342 .func idct_col_put_armv5te |
343 idct_col_put_armv5te: | |
344 str lr, [sp, #-4]! | |
345 | |
346 idct_col | |
347 | |
348 ldmfd sp!, {a3, a4} | |
349 ldr lr, [sp, #32] | |
350 add a2, a3, v1 | |
351 movs a2, a2, asr #20 | |
352 movmi a2, #0 | |
353 cmp a2, #255 | |
354 movgt a2, #255 | |
355 add ip, a4, v2 | |
356 movs ip, ip, asr #20 | |
357 movmi ip, #0 | |
358 cmp ip, #255 | |
359 movgt ip, #255 | |
360 orr a2, a2, ip, lsl #8 | |
361 sub a3, a3, v1 | |
362 movs a3, a3, asr #20 | |
363 movmi a3, #0 | |
364 cmp a3, #255 | |
365 movgt a3, #255 | |
366 sub a4, a4, v2 | |
367 movs a4, a4, asr #20 | |
368 movmi a4, #0 | |
369 cmp a4, #255 | |
370 ldr v1, [sp, #28] | |
371 movgt a4, #255 | |
372 strh a2, [v1] | |
373 add a2, v1, #2 | |
374 str a2, [sp, #28] | |
375 orr a2, a3, a4, lsl #8 | |
376 rsb v2, lr, lr, lsl #3 | |
377 ldmfd sp!, {a3, a4} | |
378 strh a2, [v2, v1]! | |
379 | |
380 sub a2, a3, v3 | |
381 movs a2, a2, asr #20 | |
382 movmi a2, #0 | |
383 cmp a2, #255 | |
384 movgt a2, #255 | |
385 sub ip, a4, v4 | |
386 movs ip, ip, asr #20 | |
387 movmi ip, #0 | |
388 cmp ip, #255 | |
389 movgt ip, #255 | |
390 orr a2, a2, ip, lsl #8 | |
391 strh a2, [v1, lr]! | |
392 add a3, a3, v3 | |
393 movs a2, a3, asr #20 | |
394 movmi a2, #0 | |
395 cmp a2, #255 | |
396 movgt a2, #255 | |
397 add a4, a4, v4 | |
398 movs a4, a4, asr #20 | |
399 movmi a4, #0 | |
400 cmp a4, #255 | |
401 movgt a4, #255 | |
402 orr a2, a2, a4, lsl #8 | |
403 ldmfd sp!, {a3, a4} | |
404 strh a2, [v2, -lr]! | |
405 | |
406 add a2, a3, v5 | |
407 movs a2, a2, asr #20 | |
408 movmi a2, #0 | |
409 cmp a2, #255 | |
410 movgt a2, #255 | |
411 add ip, a4, v6 | |
412 movs ip, ip, asr #20 | |
413 movmi ip, #0 | |
414 cmp ip, #255 | |
415 movgt ip, #255 | |
416 orr a2, a2, ip, lsl #8 | |
417 strh a2, [v1, lr]! | |
418 sub a3, a3, v5 | |
419 movs a2, a3, asr #20 | |
420 movmi a2, #0 | |
421 cmp a2, #255 | |
422 movgt a2, #255 | |
423 sub a4, a4, v6 | |
424 movs a4, a4, asr #20 | |
425 movmi a4, #0 | |
426 cmp a4, #255 | |
427 movgt a4, #255 | |
428 orr a2, a2, a4, lsl #8 | |
429 ldmfd sp!, {a3, a4} | |
430 strh a2, [v2, -lr]! | |
431 | |
432 add a2, a3, v7 | |
433 movs a2, a2, asr #20 | |
434 movmi a2, #0 | |
435 cmp a2, #255 | |
436 movgt a2, #255 | |
437 add ip, a4, fp | |
438 movs ip, ip, asr #20 | |
439 movmi ip, #0 | |
440 cmp ip, #255 | |
441 movgt ip, #255 | |
442 orr a2, a2, ip, lsl #8 | |
443 strh a2, [v1, lr] | |
444 sub a3, a3, v7 | |
445 movs a2, a3, asr #20 | |
446 movmi a2, #0 | |
447 cmp a2, #255 | |
448 movgt a2, #255 | |
449 sub a4, a4, fp | |
450 movs a4, a4, asr #20 | |
451 movmi a4, #0 | |
452 cmp a4, #255 | |
453 movgt a4, #255 | |
454 orr a2, a2, a4, lsl #8 | |
455 strh a2, [v2, -lr] | |
456 | |
457 ldr pc, [sp], #4 | |
458 .endfunc | |
459 | |
460 .align | |
4867
97d82c7585b4
add .type foo, %function directives for the benefit of debuggers
mru
parents:
3947
diff
changeset
|
461 .type idct_col_add_armv5te, %function |
3769 | 462 .func idct_col_add_armv5te |
463 idct_col_add_armv5te: | |
464 str lr, [sp, #-4]! | |
465 | |
466 idct_col | |
467 | |
468 ldr lr, [sp, #36] | |
469 | |
470 ldmfd sp!, {a3, a4} | |
471 ldrh ip, [lr] | |
472 add a2, a3, v1 | |
473 mov a2, a2, asr #20 | |
474 sub a3, a3, v1 | |
475 and v1, ip, #255 | |
476 adds a2, a2, v1 | |
477 movmi a2, #0 | |
478 cmp a2, #255 | |
479 movgt a2, #255 | |
480 add v1, a4, v2 | |
481 mov v1, v1, asr #20 | |
482 adds v1, v1, ip, lsr #8 | |
483 movmi v1, #0 | |
484 cmp v1, #255 | |
485 movgt v1, #255 | |
486 orr a2, a2, v1, lsl #8 | |
487 ldr v1, [sp, #32] | |
488 sub a4, a4, v2 | |
489 rsb v2, v1, v1, lsl #3 | |
490 ldrh ip, [v2, lr]! | |
491 strh a2, [lr] | |
492 mov a3, a3, asr #20 | |
493 and a2, ip, #255 | |
494 adds a3, a3, a2 | |
495 movmi a3, #0 | |
496 cmp a3, #255 | |
497 movgt a3, #255 | |
498 mov a4, a4, asr #20 | |
499 adds a4, a4, ip, lsr #8 | |
500 movmi a4, #0 | |
501 cmp a4, #255 | |
502 movgt a4, #255 | |
503 add a2, lr, #2 | |
504 str a2, [sp, #28] | |
505 orr a2, a3, a4, lsl #8 | |
506 strh a2, [v2] | |
507 | |
508 ldmfd sp!, {a3, a4} | |
509 ldrh ip, [lr, v1]! | |
510 sub a2, a3, v3 | |
511 mov a2, a2, asr #20 | |
512 add a3, a3, v3 | |
513 and v3, ip, #255 | |
514 adds a2, a2, v3 | |
515 movmi a2, #0 | |
516 cmp a2, #255 | |
517 movgt a2, #255 | |
518 sub v3, a4, v4 | |
519 mov v3, v3, asr #20 | |
520 adds v3, v3, ip, lsr #8 | |
521 movmi v3, #0 | |
522 cmp v3, #255 | |
523 movgt v3, #255 | |
524 orr a2, a2, v3, lsl #8 | |
525 add a4, a4, v4 | |
526 ldrh ip, [v2, -v1]! | |
527 strh a2, [lr] | |
528 mov a3, a3, asr #20 | |
529 and a2, ip, #255 | |
530 adds a3, a3, a2 | |
531 movmi a3, #0 | |
532 cmp a3, #255 | |
533 movgt a3, #255 | |
534 mov a4, a4, asr #20 | |
535 adds a4, a4, ip, lsr #8 | |
536 movmi a4, #0 | |
537 cmp a4, #255 | |
538 movgt a4, #255 | |
539 orr a2, a3, a4, lsl #8 | |
540 strh a2, [v2] | |
541 | |
542 ldmfd sp!, {a3, a4} | |
543 ldrh ip, [lr, v1]! | |
544 add a2, a3, v5 | |
545 mov a2, a2, asr #20 | |
546 sub a3, a3, v5 | |
547 and v3, ip, #255 | |
548 adds a2, a2, v3 | |
549 movmi a2, #0 | |
550 cmp a2, #255 | |
551 movgt a2, #255 | |
552 add v3, a4, v6 | |
553 mov v3, v3, asr #20 | |
554 adds v3, v3, ip, lsr #8 | |
555 movmi v3, #0 | |
556 cmp v3, #255 | |
557 movgt v3, #255 | |
558 orr a2, a2, v3, lsl #8 | |
559 sub a4, a4, v6 | |
560 ldrh ip, [v2, -v1]! | |
561 strh a2, [lr] | |
562 mov a3, a3, asr #20 | |
563 and a2, ip, #255 | |
564 adds a3, a3, a2 | |
565 movmi a3, #0 | |
566 cmp a3, #255 | |
567 movgt a3, #255 | |
568 mov a4, a4, asr #20 | |
569 adds a4, a4, ip, lsr #8 | |
570 movmi a4, #0 | |
571 cmp a4, #255 | |
572 movgt a4, #255 | |
573 orr a2, a3, a4, lsl #8 | |
574 strh a2, [v2] | |
575 | |
576 ldmfd sp!, {a3, a4} | |
577 ldrh ip, [lr, v1]! | |
578 add a2, a3, v7 | |
579 mov a2, a2, asr #20 | |
580 sub a3, a3, v7 | |
581 and v3, ip, #255 | |
582 adds a2, a2, v3 | |
583 movmi a2, #0 | |
584 cmp a2, #255 | |
585 movgt a2, #255 | |
586 add v3, a4, fp | |
587 mov v3, v3, asr #20 | |
588 adds v3, v3, ip, lsr #8 | |
589 movmi v3, #0 | |
590 cmp v3, #255 | |
591 movgt v3, #255 | |
592 orr a2, a2, v3, lsl #8 | |
593 sub a4, a4, fp | |
594 ldrh ip, [v2, -v1]! | |
595 strh a2, [lr] | |
596 mov a3, a3, asr #20 | |
597 and a2, ip, #255 | |
598 adds a3, a3, a2 | |
599 movmi a3, #0 | |
600 cmp a3, #255 | |
601 movgt a3, #255 | |
602 mov a4, a4, asr #20 | |
603 adds a4, a4, ip, lsr #8 | |
604 movmi a4, #0 | |
605 cmp a4, #255 | |
606 movgt a4, #255 | |
607 orr a2, a3, a4, lsl #8 | |
608 strh a2, [v2] | |
609 | |
610 ldr pc, [sp], #4 | |
611 .endfunc | |
612 | |
613 .align | |
614 .global simple_idct_armv5te | |
4867
97d82c7585b4
add .type foo, %function directives for the benefit of debuggers
mru
parents:
3947
diff
changeset
|
615 .type simple_idct_armv5te, %function |
3769 | 616 .func simple_idct_armv5te |
617 simple_idct_armv5te: | |
618 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
619 | |
620 bl idct_row_armv5te | |
621 add a1, a1, #16 | |
622 bl idct_row_armv5te | |
623 add a1, a1, #16 | |
624 bl idct_row_armv5te | |
625 add a1, a1, #16 | |
626 bl idct_row_armv5te | |
627 add a1, a1, #16 | |
628 bl idct_row_armv5te | |
629 add a1, a1, #16 | |
630 bl idct_row_armv5te | |
631 add a1, a1, #16 | |
632 bl idct_row_armv5te | |
633 add a1, a1, #16 | |
634 bl idct_row_armv5te | |
635 | |
636 sub a1, a1, #(16*7) | |
637 | |
638 bl idct_col_armv5te | |
639 add a1, a1, #4 | |
640 bl idct_col_armv5te | |
641 add a1, a1, #4 | |
642 bl idct_col_armv5te | |
643 add a1, a1, #4 | |
644 bl idct_col_armv5te | |
645 | |
646 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
647 .endfunc | |
648 | |
649 .align | |
650 .global simple_idct_add_armv5te | |
4867
97d82c7585b4
add .type foo, %function directives for the benefit of debuggers
mru
parents:
3947
diff
changeset
|
651 .type simple_idct_add_armv5te, %function |
3769 | 652 .func simple_idct_add_armv5te |
653 simple_idct_add_armv5te: | |
654 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
655 | |
656 mov a1, a3 | |
657 | |
658 bl idct_row_armv5te | |
659 add a1, a1, #16 | |
660 bl idct_row_armv5te | |
661 add a1, a1, #16 | |
662 bl idct_row_armv5te | |
663 add a1, a1, #16 | |
664 bl idct_row_armv5te | |
665 add a1, a1, #16 | |
666 bl idct_row_armv5te | |
667 add a1, a1, #16 | |
668 bl idct_row_armv5te | |
669 add a1, a1, #16 | |
670 bl idct_row_armv5te | |
671 add a1, a1, #16 | |
672 bl idct_row_armv5te | |
673 | |
674 sub a1, a1, #(16*7) | |
675 | |
676 bl idct_col_add_armv5te | |
677 add a1, a1, #4 | |
678 bl idct_col_add_armv5te | |
679 add a1, a1, #4 | |
680 bl idct_col_add_armv5te | |
681 add a1, a1, #4 | |
682 bl idct_col_add_armv5te | |
683 | |
684 add sp, sp, #8 | |
685 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
686 .endfunc | |
687 | |
688 .align | |
689 .global simple_idct_put_armv5te | |
4867
97d82c7585b4
add .type foo, %function directives for the benefit of debuggers
mru
parents:
3947
diff
changeset
|
690 .type simple_idct_put_armv5te, %function |
3769 | 691 .func simple_idct_put_armv5te |
692 simple_idct_put_armv5te: | |
693 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
694 | |
695 mov a1, a3 | |
696 | |
697 bl idct_row_armv5te | |
698 add a1, a1, #16 | |
699 bl idct_row_armv5te | |
700 add a1, a1, #16 | |
701 bl idct_row_armv5te | |
702 add a1, a1, #16 | |
703 bl idct_row_armv5te | |
704 add a1, a1, #16 | |
705 bl idct_row_armv5te | |
706 add a1, a1, #16 | |
707 bl idct_row_armv5te | |
708 add a1, a1, #16 | |
709 bl idct_row_armv5te | |
710 add a1, a1, #16 | |
711 bl idct_row_armv5te | |
712 | |
713 sub a1, a1, #(16*7) | |
714 | |
715 bl idct_col_put_armv5te | |
716 add a1, a1, #4 | |
717 bl idct_col_put_armv5te | |
718 add a1, a1, #4 | |
719 bl idct_col_put_armv5te | |
720 add a1, a1, #4 | |
721 bl idct_col_put_armv5te | |
722 | |
723 add sp, sp, #8 | |
724 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
725 .endfunc |