Mercurial > libavcodec.hg
annotate arm/simple_idct_armv5te.S @ 9899:06ab8ac1a593 libavcodec
Fix libx264.c to not drop SEI userdata from x264 encoder.
Most muxers in ffmpeg ignore the SEI if it is placed in extradata, so instead
it has to be catted to the front of the first video frame.
author | darkshikari |
---|---|
date | Tue, 30 Jun 2009 23:45:01 +0000 |
parents | 9281a8a9387a |
children | 989ea69f6a4e |
rev | line source |
---|---|
3769 | 1 /* |
2 * Simple IDCT | |
3 * | |
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5220 | 5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com> |
3769 | 6 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
3769 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
12 * version 2.1 of the License, or (at your option) any later version. |
3769 | 13 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
3769 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3769
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
3769 | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 */ | |
23 | |
8069 | 24 #include "asm.S" |
25 | |
3769 | 26 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |
27 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
28 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
29 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
30 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
31 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
32 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
33 #define ROW_SHIFT 11 | |
34 #define COL_SHIFT 20 | |
35 | |
36 #define W13 (W1 | (W3 << 16)) | |
37 #define W26 (W2 | (W6 << 16)) | |
38 #define W57 (W5 | (W7 << 16)) | |
39 | |
40 .text | |
41 .align | |
42 w13: .long W13 | |
43 w26: .long W26 | |
44 w57: .long W57 | |
45 | |
8069 | 46 function idct_row_armv5te |
3769 | 47 str lr, [sp, #-4]! |
48 | |
49 ldrd v1, [a1, #8] | |
50 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |
51 orrs v1, v1, v2 | |
52 cmpeq v1, a4 | |
53 cmpeq v1, a3, lsr #16 | |
54 beq row_dc_only | |
55 | |
56 mov v1, #(1<<(ROW_SHIFT-1)) | |
57 mov ip, #16384 | |
58 sub ip, ip, #1 /* ip = W4 */ | |
59 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ | |
60 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
61 smultb a2, ip, a4 | |
62 smulbb lr, ip, a4 | |
63 add v2, v1, a2 | |
64 sub v3, v1, a2 | |
65 sub v4, v1, lr | |
66 add v1, v1, lr | |
67 | |
68 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ | |
69 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */ | |
70 smulbt v5, ip, a3 | |
71 smultt v6, lr, a4 | |
72 smlatt v5, ip, a4, v5 | |
73 smultt a2, ip, a3 | |
74 smulbt v7, lr, a3 | |
75 sub v6, v6, a2 | |
76 smulbt a2, ip, a4 | |
77 smultt fp, lr, a3 | |
78 sub v7, v7, a2 | |
79 smulbt a2, lr, a4 | |
80 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |
81 sub fp, fp, a2 | |
82 | |
83 orrs a2, a3, a4 | |
84 beq 1f | |
85 | |
86 smlabt v5, lr, a3, v5 | |
87 smlabt v6, ip, a3, v6 | |
88 smlatt v5, lr, a4, v5 | |
89 smlabt v6, lr, a4, v6 | |
90 smlatt v7, lr, a3, v7 | |
91 smlatt fp, ip, a3, fp | |
92 smulbt a2, ip, a4 | |
93 smlatt v7, ip, a4, v7 | |
94 sub fp, fp, a2 | |
95 | |
96 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */ | |
97 mov a2, #16384 | |
98 sub a2, a2, #1 /* a2 = W4 */ | |
99 smulbb a2, a2, a3 /* a2 = W4*row[4] */ | |
100 smultb lr, ip, a4 /* lr = W6*row[6] */ | |
101 add v1, v1, a2 /* v1 += W4*row[4] */ | |
102 add v1, v1, lr /* v1 += W6*row[6] */ | |
103 add v4, v4, a2 /* v4 += W4*row[4] */ | |
104 sub v4, v4, lr /* v4 -= W6*row[6] */ | |
105 smulbb lr, ip, a4 /* lr = W2*row[6] */ | |
106 sub v2, v2, a2 /* v2 -= W4*row[4] */ | |
107 sub v2, v2, lr /* v2 -= W2*row[6] */ | |
108 sub v3, v3, a2 /* v3 -= W4*row[4] */ | |
109 add v3, v3, lr /* v3 += W2*row[6] */ | |
110 | |
111 1: add a2, v1, v5 | |
112 mov a3, a2, lsr #11 | |
113 bic a3, a3, #0x1f0000 | |
114 sub a2, v2, v6 | |
115 mov a2, a2, lsr #11 | |
116 add a3, a3, a2, lsl #16 | |
117 add a2, v3, v7 | |
118 mov a4, a2, lsr #11 | |
119 bic a4, a4, #0x1f0000 | |
120 add a2, v4, fp | |
121 mov a2, a2, lsr #11 | |
122 add a4, a4, a2, lsl #16 | |
123 strd a3, [a1] | |
124 | |
125 sub a2, v4, fp | |
126 mov a3, a2, lsr #11 | |
127 bic a3, a3, #0x1f0000 | |
128 sub a2, v3, v7 | |
129 mov a2, a2, lsr #11 | |
130 add a3, a3, a2, lsl #16 | |
131 add a2, v2, v6 | |
132 mov a4, a2, lsr #11 | |
133 bic a4, a4, #0x1f0000 | |
134 sub a2, v1, v5 | |
135 mov a2, a2, lsr #11 | |
136 add a4, a4, a2, lsl #16 | |
137 strd a3, [a1, #8] | |
138 | |
139 ldr pc, [sp], #4 | |
140 | |
141 row_dc_only: | |
142 orr a3, a3, a3, lsl #16 | |
143 bic a3, a3, #0xe000 | |
144 mov a3, a3, lsl #3 | |
145 mov a4, a3 | |
146 strd a3, [a1] | |
147 strd a3, [a1, #8] | |
148 | |
149 ldr pc, [sp], #4 | |
150 .endfunc | |
151 | |
152 .macro idct_col | |
153 ldr a4, [a1] /* a4 = col[1:0] */ | |
154 mov ip, #16384 | |
155 sub ip, ip, #1 /* ip = W4 */ | |
156 #if 0 | |
157 mov v1, #(1<<(COL_SHIFT-1)) | |
158 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ | |
159 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ | |
160 ldr a4, [a1, #(16*4)] | |
161 #else | |
162 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ | |
163 add v2, v1, a4, asr #16 | |
164 rsb v2, v2, v2, lsl #14 | |
165 mov a4, a4, lsl #16 | |
166 add v1, v1, a4, asr #16 | |
167 ldr a4, [a1, #(16*4)] | |
168 rsb v1, v1, v1, lsl #14 | |
169 #endif | |
170 | |
171 smulbb lr, ip, a4 | |
172 smulbt a3, ip, a4 | |
173 sub v3, v1, lr | |
174 sub v5, v1, lr | |
175 add v7, v1, lr | |
176 add v1, v1, lr | |
177 sub v4, v2, a3 | |
178 sub v6, v2, a3 | |
179 add fp, v2, a3 | |
180 ldr ip, [pc, #(w26-.-8)] | |
181 ldr a4, [a1, #(16*2)] | |
182 add v2, v2, a3 | |
183 | |
184 smulbb lr, ip, a4 | |
185 smultb a3, ip, a4 | |
186 add v1, v1, lr | |
187 sub v7, v7, lr | |
188 add v3, v3, a3 | |
189 sub v5, v5, a3 | |
190 smulbt lr, ip, a4 | |
191 smultt a3, ip, a4 | |
192 add v2, v2, lr | |
193 sub fp, fp, lr | |
194 add v4, v4, a3 | |
195 ldr a4, [a1, #(16*6)] | |
196 sub v6, v6, a3 | |
197 | |
198 smultb lr, ip, a4 | |
199 smulbb a3, ip, a4 | |
200 add v1, v1, lr | |
201 sub v7, v7, lr | |
202 sub v3, v3, a3 | |
203 add v5, v5, a3 | |
204 smultt lr, ip, a4 | |
205 smulbt a3, ip, a4 | |
206 add v2, v2, lr | |
207 sub fp, fp, lr | |
208 sub v4, v4, a3 | |
209 add v6, v6, a3 | |
210 | |
211 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} | |
212 | |
213 ldr ip, [pc, #(w13-.-8)] | |
214 ldr a4, [a1, #(16*1)] | |
215 ldr lr, [pc, #(w57-.-8)] | |
216 smulbb v1, ip, a4 | |
217 smultb v3, ip, a4 | |
218 smulbb v5, lr, a4 | |
219 smultb v7, lr, a4 | |
220 smulbt v2, ip, a4 | |
221 smultt v4, ip, a4 | |
222 smulbt v6, lr, a4 | |
223 smultt fp, lr, a4 | |
224 rsb v4, v4, #0 | |
225 ldr a4, [a1, #(16*3)] | |
226 rsb v3, v3, #0 | |
227 | |
228 smlatb v1, ip, a4, v1 | |
229 smlatb v3, lr, a4, v3 | |
230 smulbb a3, ip, a4 | |
231 smulbb a2, lr, a4 | |
232 sub v5, v5, a3 | |
233 sub v7, v7, a2 | |
234 smlatt v2, ip, a4, v2 | |
235 smlatt v4, lr, a4, v4 | |
236 smulbt a3, ip, a4 | |
237 smulbt a2, lr, a4 | |
238 sub v6, v6, a3 | |
239 ldr a4, [a1, #(16*5)] | |
240 sub fp, fp, a2 | |
241 | |
242 smlabb v1, lr, a4, v1 | |
243 smlabb v3, ip, a4, v3 | |
244 smlatb v5, lr, a4, v5 | |
245 smlatb v7, ip, a4, v7 | |
246 smlabt v2, lr, a4, v2 | |
247 smlabt v4, ip, a4, v4 | |
248 smlatt v6, lr, a4, v6 | |
249 ldr a3, [a1, #(16*7)] | |
250 smlatt fp, ip, a4, fp | |
251 | |
252 smlatb v1, lr, a3, v1 | |
253 smlabb v3, lr, a3, v3 | |
254 smlatb v5, ip, a3, v5 | |
255 smulbb a4, ip, a3 | |
256 smlatt v2, lr, a3, v2 | |
257 sub v7, v7, a4 | |
258 smlabt v4, lr, a3, v4 | |
259 smulbt a4, ip, a3 | |
260 smlatt v6, ip, a3, v6 | |
261 sub fp, fp, a4 | |
262 .endm | |
263 | |
8069 | 264 function idct_col_armv5te |
3769 | 265 str lr, [sp, #-4]! |
266 | |
267 idct_col | |
268 | |
269 ldmfd sp!, {a3, a4} | |
270 adds a2, a3, v1 | |
271 mov a2, a2, lsr #20 | |
272 orrmi a2, a2, #0xf000 | |
273 add ip, a4, v2 | |
274 mov ip, ip, asr #20 | |
275 orr a2, a2, ip, lsl #16 | |
276 str a2, [a1] | |
277 subs a3, a3, v1 | |
278 mov a2, a3, lsr #20 | |
279 orrmi a2, a2, #0xf000 | |
280 sub a4, a4, v2 | |
281 mov a4, a4, asr #20 | |
282 orr a2, a2, a4, lsl #16 | |
283 ldmfd sp!, {a3, a4} | |
284 str a2, [a1, #(16*7)] | |
285 | |
286 subs a2, a3, v3 | |
287 mov a2, a2, lsr #20 | |
288 orrmi a2, a2, #0xf000 | |
289 sub ip, a4, v4 | |
290 mov ip, ip, asr #20 | |
291 orr a2, a2, ip, lsl #16 | |
292 str a2, [a1, #(16*1)] | |
293 adds a3, a3, v3 | |
294 mov a2, a3, lsr #20 | |
295 orrmi a2, a2, #0xf000 | |
296 add a4, a4, v4 | |
297 mov a4, a4, asr #20 | |
298 orr a2, a2, a4, lsl #16 | |
299 ldmfd sp!, {a3, a4} | |
300 str a2, [a1, #(16*6)] | |
301 | |
302 adds a2, a3, v5 | |
303 mov a2, a2, lsr #20 | |
304 orrmi a2, a2, #0xf000 | |
305 add ip, a4, v6 | |
306 mov ip, ip, asr #20 | |
307 orr a2, a2, ip, lsl #16 | |
308 str a2, [a1, #(16*2)] | |
309 subs a3, a3, v5 | |
310 mov a2, a3, lsr #20 | |
311 orrmi a2, a2, #0xf000 | |
312 sub a4, a4, v6 | |
313 mov a4, a4, asr #20 | |
314 orr a2, a2, a4, lsl #16 | |
315 ldmfd sp!, {a3, a4} | |
316 str a2, [a1, #(16*5)] | |
317 | |
318 adds a2, a3, v7 | |
319 mov a2, a2, lsr #20 | |
320 orrmi a2, a2, #0xf000 | |
321 add ip, a4, fp | |
322 mov ip, ip, asr #20 | |
323 orr a2, a2, ip, lsl #16 | |
324 str a2, [a1, #(16*3)] | |
325 subs a3, a3, v7 | |
326 mov a2, a3, lsr #20 | |
327 orrmi a2, a2, #0xf000 | |
328 sub a4, a4, fp | |
329 mov a4, a4, asr #20 | |
330 orr a2, a2, a4, lsl #16 | |
331 str a2, [a1, #(16*4)] | |
332 | |
333 ldr pc, [sp], #4 | |
334 .endfunc | |
335 | |
8069 | 336 function idct_col_put_armv5te |
3769 | 337 str lr, [sp, #-4]! |
338 | |
339 idct_col | |
340 | |
341 ldmfd sp!, {a3, a4} | |
342 ldr lr, [sp, #32] | |
343 add a2, a3, v1 | |
344 movs a2, a2, asr #20 | |
345 movmi a2, #0 | |
346 cmp a2, #255 | |
347 movgt a2, #255 | |
348 add ip, a4, v2 | |
349 movs ip, ip, asr #20 | |
350 movmi ip, #0 | |
351 cmp ip, #255 | |
352 movgt ip, #255 | |
353 orr a2, a2, ip, lsl #8 | |
354 sub a3, a3, v1 | |
355 movs a3, a3, asr #20 | |
356 movmi a3, #0 | |
357 cmp a3, #255 | |
358 movgt a3, #255 | |
359 sub a4, a4, v2 | |
360 movs a4, a4, asr #20 | |
361 movmi a4, #0 | |
362 cmp a4, #255 | |
363 ldr v1, [sp, #28] | |
364 movgt a4, #255 | |
365 strh a2, [v1] | |
366 add a2, v1, #2 | |
367 str a2, [sp, #28] | |
368 orr a2, a3, a4, lsl #8 | |
369 rsb v2, lr, lr, lsl #3 | |
370 ldmfd sp!, {a3, a4} | |
371 strh a2, [v2, v1]! | |
372 | |
373 sub a2, a3, v3 | |
374 movs a2, a2, asr #20 | |
375 movmi a2, #0 | |
376 cmp a2, #255 | |
377 movgt a2, #255 | |
378 sub ip, a4, v4 | |
379 movs ip, ip, asr #20 | |
380 movmi ip, #0 | |
381 cmp ip, #255 | |
382 movgt ip, #255 | |
383 orr a2, a2, ip, lsl #8 | |
384 strh a2, [v1, lr]! | |
385 add a3, a3, v3 | |
386 movs a2, a3, asr #20 | |
387 movmi a2, #0 | |
388 cmp a2, #255 | |
389 movgt a2, #255 | |
390 add a4, a4, v4 | |
391 movs a4, a4, asr #20 | |
392 movmi a4, #0 | |
393 cmp a4, #255 | |
394 movgt a4, #255 | |
395 orr a2, a2, a4, lsl #8 | |
396 ldmfd sp!, {a3, a4} | |
397 strh a2, [v2, -lr]! | |
398 | |
399 add a2, a3, v5 | |
400 movs a2, a2, asr #20 | |
401 movmi a2, #0 | |
402 cmp a2, #255 | |
403 movgt a2, #255 | |
404 add ip, a4, v6 | |
405 movs ip, ip, asr #20 | |
406 movmi ip, #0 | |
407 cmp ip, #255 | |
408 movgt ip, #255 | |
409 orr a2, a2, ip, lsl #8 | |
410 strh a2, [v1, lr]! | |
411 sub a3, a3, v5 | |
412 movs a2, a3, asr #20 | |
413 movmi a2, #0 | |
414 cmp a2, #255 | |
415 movgt a2, #255 | |
416 sub a4, a4, v6 | |
417 movs a4, a4, asr #20 | |
418 movmi a4, #0 | |
419 cmp a4, #255 | |
420 movgt a4, #255 | |
421 orr a2, a2, a4, lsl #8 | |
422 ldmfd sp!, {a3, a4} | |
423 strh a2, [v2, -lr]! | |
424 | |
425 add a2, a3, v7 | |
426 movs a2, a2, asr #20 | |
427 movmi a2, #0 | |
428 cmp a2, #255 | |
429 movgt a2, #255 | |
430 add ip, a4, fp | |
431 movs ip, ip, asr #20 | |
432 movmi ip, #0 | |
433 cmp ip, #255 | |
434 movgt ip, #255 | |
435 orr a2, a2, ip, lsl #8 | |
436 strh a2, [v1, lr] | |
437 sub a3, a3, v7 | |
438 movs a2, a3, asr #20 | |
439 movmi a2, #0 | |
440 cmp a2, #255 | |
441 movgt a2, #255 | |
442 sub a4, a4, fp | |
443 movs a4, a4, asr #20 | |
444 movmi a4, #0 | |
445 cmp a4, #255 | |
446 movgt a4, #255 | |
447 orr a2, a2, a4, lsl #8 | |
448 strh a2, [v2, -lr] | |
449 | |
450 ldr pc, [sp], #4 | |
451 .endfunc | |
452 | |
8069 | 453 function idct_col_add_armv5te |
3769 | 454 str lr, [sp, #-4]! |
455 | |
456 idct_col | |
457 | |
458 ldr lr, [sp, #36] | |
459 | |
460 ldmfd sp!, {a3, a4} | |
461 ldrh ip, [lr] | |
462 add a2, a3, v1 | |
463 mov a2, a2, asr #20 | |
464 sub a3, a3, v1 | |
465 and v1, ip, #255 | |
466 adds a2, a2, v1 | |
467 movmi a2, #0 | |
468 cmp a2, #255 | |
469 movgt a2, #255 | |
470 add v1, a4, v2 | |
471 mov v1, v1, asr #20 | |
472 adds v1, v1, ip, lsr #8 | |
473 movmi v1, #0 | |
474 cmp v1, #255 | |
475 movgt v1, #255 | |
476 orr a2, a2, v1, lsl #8 | |
477 ldr v1, [sp, #32] | |
478 sub a4, a4, v2 | |
479 rsb v2, v1, v1, lsl #3 | |
480 ldrh ip, [v2, lr]! | |
481 strh a2, [lr] | |
482 mov a3, a3, asr #20 | |
483 and a2, ip, #255 | |
484 adds a3, a3, a2 | |
485 movmi a3, #0 | |
486 cmp a3, #255 | |
487 movgt a3, #255 | |
488 mov a4, a4, asr #20 | |
489 adds a4, a4, ip, lsr #8 | |
490 movmi a4, #0 | |
491 cmp a4, #255 | |
492 movgt a4, #255 | |
493 add a2, lr, #2 | |
494 str a2, [sp, #28] | |
495 orr a2, a3, a4, lsl #8 | |
496 strh a2, [v2] | |
497 | |
498 ldmfd sp!, {a3, a4} | |
499 ldrh ip, [lr, v1]! | |
500 sub a2, a3, v3 | |
501 mov a2, a2, asr #20 | |
502 add a3, a3, v3 | |
503 and v3, ip, #255 | |
504 adds a2, a2, v3 | |
505 movmi a2, #0 | |
506 cmp a2, #255 | |
507 movgt a2, #255 | |
508 sub v3, a4, v4 | |
509 mov v3, v3, asr #20 | |
510 adds v3, v3, ip, lsr #8 | |
511 movmi v3, #0 | |
512 cmp v3, #255 | |
513 movgt v3, #255 | |
514 orr a2, a2, v3, lsl #8 | |
515 add a4, a4, v4 | |
516 ldrh ip, [v2, -v1]! | |
517 strh a2, [lr] | |
518 mov a3, a3, asr #20 | |
519 and a2, ip, #255 | |
520 adds a3, a3, a2 | |
521 movmi a3, #0 | |
522 cmp a3, #255 | |
523 movgt a3, #255 | |
524 mov a4, a4, asr #20 | |
525 adds a4, a4, ip, lsr #8 | |
526 movmi a4, #0 | |
527 cmp a4, #255 | |
528 movgt a4, #255 | |
529 orr a2, a3, a4, lsl #8 | |
530 strh a2, [v2] | |
531 | |
532 ldmfd sp!, {a3, a4} | |
533 ldrh ip, [lr, v1]! | |
534 add a2, a3, v5 | |
535 mov a2, a2, asr #20 | |
536 sub a3, a3, v5 | |
537 and v3, ip, #255 | |
538 adds a2, a2, v3 | |
539 movmi a2, #0 | |
540 cmp a2, #255 | |
541 movgt a2, #255 | |
542 add v3, a4, v6 | |
543 mov v3, v3, asr #20 | |
544 adds v3, v3, ip, lsr #8 | |
545 movmi v3, #0 | |
546 cmp v3, #255 | |
547 movgt v3, #255 | |
548 orr a2, a2, v3, lsl #8 | |
549 sub a4, a4, v6 | |
550 ldrh ip, [v2, -v1]! | |
551 strh a2, [lr] | |
552 mov a3, a3, asr #20 | |
553 and a2, ip, #255 | |
554 adds a3, a3, a2 | |
555 movmi a3, #0 | |
556 cmp a3, #255 | |
557 movgt a3, #255 | |
558 mov a4, a4, asr #20 | |
559 adds a4, a4, ip, lsr #8 | |
560 movmi a4, #0 | |
561 cmp a4, #255 | |
562 movgt a4, #255 | |
563 orr a2, a3, a4, lsl #8 | |
564 strh a2, [v2] | |
565 | |
566 ldmfd sp!, {a3, a4} | |
567 ldrh ip, [lr, v1]! | |
568 add a2, a3, v7 | |
569 mov a2, a2, asr #20 | |
570 sub a3, a3, v7 | |
571 and v3, ip, #255 | |
572 adds a2, a2, v3 | |
573 movmi a2, #0 | |
574 cmp a2, #255 | |
575 movgt a2, #255 | |
576 add v3, a4, fp | |
577 mov v3, v3, asr #20 | |
578 adds v3, v3, ip, lsr #8 | |
579 movmi v3, #0 | |
580 cmp v3, #255 | |
581 movgt v3, #255 | |
582 orr a2, a2, v3, lsl #8 | |
583 sub a4, a4, fp | |
584 ldrh ip, [v2, -v1]! | |
585 strh a2, [lr] | |
586 mov a3, a3, asr #20 | |
587 and a2, ip, #255 | |
588 adds a3, a3, a2 | |
589 movmi a3, #0 | |
590 cmp a3, #255 | |
591 movgt a3, #255 | |
592 mov a4, a4, asr #20 | |
593 adds a4, a4, ip, lsr #8 | |
594 movmi a4, #0 | |
595 cmp a4, #255 | |
596 movgt a4, #255 | |
597 orr a2, a3, a4, lsl #8 | |
598 strh a2, [v2] | |
599 | |
600 ldr pc, [sp], #4 | |
601 .endfunc | |
602 | |
8069 | 603 function simple_idct_armv5te, export=1 |
3769 | 604 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} |
605 | |
606 bl idct_row_armv5te | |
607 add a1, a1, #16 | |
608 bl idct_row_armv5te | |
609 add a1, a1, #16 | |
610 bl idct_row_armv5te | |
611 add a1, a1, #16 | |
612 bl idct_row_armv5te | |
613 add a1, a1, #16 | |
614 bl idct_row_armv5te | |
615 add a1, a1, #16 | |
616 bl idct_row_armv5te | |
617 add a1, a1, #16 | |
618 bl idct_row_armv5te | |
619 add a1, a1, #16 | |
620 bl idct_row_armv5te | |
621 | |
622 sub a1, a1, #(16*7) | |
623 | |
624 bl idct_col_armv5te | |
625 add a1, a1, #4 | |
626 bl idct_col_armv5te | |
627 add a1, a1, #4 | |
628 bl idct_col_armv5te | |
629 add a1, a1, #4 | |
630 bl idct_col_armv5te | |
631 | |
632 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
633 .endfunc | |
634 | |
8069 | 635 function simple_idct_add_armv5te, export=1 |
3769 | 636 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} |
637 | |
638 mov a1, a3 | |
639 | |
640 bl idct_row_armv5te | |
641 add a1, a1, #16 | |
642 bl idct_row_armv5te | |
643 add a1, a1, #16 | |
644 bl idct_row_armv5te | |
645 add a1, a1, #16 | |
646 bl idct_row_armv5te | |
647 add a1, a1, #16 | |
648 bl idct_row_armv5te | |
649 add a1, a1, #16 | |
650 bl idct_row_armv5te | |
651 add a1, a1, #16 | |
652 bl idct_row_armv5te | |
653 add a1, a1, #16 | |
654 bl idct_row_armv5te | |
655 | |
656 sub a1, a1, #(16*7) | |
657 | |
658 bl idct_col_add_armv5te | |
659 add a1, a1, #4 | |
660 bl idct_col_add_armv5te | |
661 add a1, a1, #4 | |
662 bl idct_col_add_armv5te | |
663 add a1, a1, #4 | |
664 bl idct_col_add_armv5te | |
665 | |
666 add sp, sp, #8 | |
667 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
668 .endfunc | |
669 | |
8069 | 670 function simple_idct_put_armv5te, export=1 |
3769 | 671 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} |
672 | |
673 mov a1, a3 | |
674 | |
675 bl idct_row_armv5te | |
676 add a1, a1, #16 | |
677 bl idct_row_armv5te | |
678 add a1, a1, #16 | |
679 bl idct_row_armv5te | |
680 add a1, a1, #16 | |
681 bl idct_row_armv5te | |
682 add a1, a1, #16 | |
683 bl idct_row_armv5te | |
684 add a1, a1, #16 | |
685 bl idct_row_armv5te | |
686 add a1, a1, #16 | |
687 bl idct_row_armv5te | |
688 add a1, a1, #16 | |
689 bl idct_row_armv5te | |
690 | |
691 sub a1, a1, #(16*7) | |
692 | |
693 bl idct_col_put_armv5te | |
694 add a1, a1, #4 | |
695 bl idct_col_put_armv5te | |
696 add a1, a1, #4 | |
697 bl idct_col_put_armv5te | |
698 add a1, a1, #4 | |
699 bl idct_col_put_armv5te | |
700 | |
701 add sp, sp, #8 | |
702 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
703 .endfunc |