annotate armv4l/simple_idct_armv5te.S @ 3990:746a60ba3177 libavcodec

enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author michael
date Wed, 11 Oct 2006 12:23:40 +0000
parents c8c591fe26f8
children 97d82c7585b4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
1 /*
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
2 * Simple IDCT
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
3 *
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
5 * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com>
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
6 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
7 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
8 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
13 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
17 * Lesser General Public License for more details.
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
18 *
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
22 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
23
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
31 #define ROW_SHIFT 11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
32 #define COL_SHIFT 20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
33
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
34 #define W13 (W1 | (W3 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
35 #define W26 (W2 | (W6 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
36 #define W57 (W5 | (W7 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
37
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
38 .text
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
39 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
40 w13: .long W13
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
41 w26: .long W26
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
42 w57: .long W57
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
43
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
44 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
45 .func idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
46 idct_row_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
47 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
48
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
49 ldrd v1, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
50 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
51 orrs v1, v1, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
52 cmpeq v1, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
53 cmpeq v1, a3, lsr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
54 beq row_dc_only
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
55
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
56 mov v1, #(1<<(ROW_SHIFT-1))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
57 mov ip, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
58 sub ip, ip, #1 /* ip = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
59 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
60 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
61 smultb a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
62 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
63 add v2, v1, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
64 sub v3, v1, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
65 sub v4, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
66 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
67
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
68 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
69 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
70 smulbt v5, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
71 smultt v6, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
72 smlatt v5, ip, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
73 smultt a2, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
74 smulbt v7, lr, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
75 sub v6, v6, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
76 smulbt a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
77 smultt fp, lr, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
78 sub v7, v7, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
79 smulbt a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
80 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
81 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
82
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
83 orrs a2, a3, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
84 beq 1f
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
85
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
86 smlabt v5, lr, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
87 smlabt v6, ip, a3, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
88 smlatt v5, lr, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
89 smlabt v6, lr, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
90 smlatt v7, lr, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
91 smlatt fp, ip, a3, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
92 smulbt a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
93 smlatt v7, ip, a4, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
94 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
95
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
96 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
97 mov a2, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
98 sub a2, a2, #1 /* a2 = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
99 smulbb a2, a2, a3 /* a2 = W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
100 smultb lr, ip, a4 /* lr = W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
101 add v1, v1, a2 /* v1 += W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
102 add v1, v1, lr /* v1 += W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
103 add v4, v4, a2 /* v4 += W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
104 sub v4, v4, lr /* v4 -= W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
105 smulbb lr, ip, a4 /* lr = W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
106 sub v2, v2, a2 /* v2 -= W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
107 sub v2, v2, lr /* v2 -= W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
108 sub v3, v3, a2 /* v3 -= W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
109 add v3, v3, lr /* v3 += W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
110
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
111 1: add a2, v1, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
112 mov a3, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
113 bic a3, a3, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
114 sub a2, v2, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
115 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
116 add a3, a3, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
117 add a2, v3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
118 mov a4, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
119 bic a4, a4, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
120 add a2, v4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
121 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
122 add a4, a4, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
123 strd a3, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
124
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
125 sub a2, v4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
126 mov a3, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
127 bic a3, a3, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
128 sub a2, v3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
129 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
130 add a3, a3, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
131 add a2, v2, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
132 mov a4, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
133 bic a4, a4, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
134 sub a2, v1, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
135 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
136 add a4, a4, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
137 strd a3, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
138
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
139 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
140
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
141 row_dc_only:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
142 orr a3, a3, a3, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
143 bic a3, a3, #0xe000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
144 mov a3, a3, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
145 mov a4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
146 strd a3, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
147 strd a3, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
148
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
149 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
150 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
151
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
152 .macro idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
153 ldr a4, [a1] /* a4 = col[1:0] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
154 mov ip, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
155 sub ip, ip, #1 /* ip = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
156 #if 0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
157 mov v1, #(1<<(COL_SHIFT-1))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
158 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
159 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
160 ldr a4, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
161 #else
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
162 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
163 add v2, v1, a4, asr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
164 rsb v2, v2, v2, lsl #14
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
165 mov a4, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
166 add v1, v1, a4, asr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
167 ldr a4, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
168 rsb v1, v1, v1, lsl #14
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
169 #endif
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
170
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
171 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
172 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
173 sub v3, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
174 sub v5, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
175 add v7, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
176 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
177 sub v4, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
178 sub v6, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
179 add fp, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
180 ldr ip, [pc, #(w26-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
181 ldr a4, [a1, #(16*2)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
182 add v2, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
183
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
184 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
185 smultb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
186 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
187 sub v7, v7, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
188 add v3, v3, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
189 sub v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
190 smulbt lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
191 smultt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
192 add v2, v2, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
193 sub fp, fp, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
194 add v4, v4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
195 ldr a4, [a1, #(16*6)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
196 sub v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
197
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
198 smultb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
199 smulbb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
200 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
201 sub v7, v7, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
202 sub v3, v3, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
203 add v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
204 smultt lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
205 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
206 add v2, v2, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
207 sub fp, fp, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
208 sub v4, v4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
209 add v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
210
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
211 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
212
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
213 ldr ip, [pc, #(w13-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
214 ldr a4, [a1, #(16*1)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
215 ldr lr, [pc, #(w57-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
216 smulbb v1, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
217 smultb v3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
218 smulbb v5, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
219 smultb v7, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
220 smulbt v2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
221 smultt v4, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
222 smulbt v6, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
223 smultt fp, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
224 rsb v4, v4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
225 ldr a4, [a1, #(16*3)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
226 rsb v3, v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
227
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
228 smlatb v1, ip, a4, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
229 smlatb v3, lr, a4, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
230 smulbb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
231 smulbb a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
232 sub v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
233 sub v7, v7, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
234 smlatt v2, ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
235 smlatt v4, lr, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
236 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
237 smulbt a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
238 sub v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
239 ldr a4, [a1, #(16*5)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
240 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
241
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
242 smlabb v1, lr, a4, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
243 smlabb v3, ip, a4, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
244 smlatb v5, lr, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
245 smlatb v7, ip, a4, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
246 smlabt v2, lr, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
247 smlabt v4, ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
248 smlatt v6, lr, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
249 ldr a3, [a1, #(16*7)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
250 smlatt fp, ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
251
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
252 smlatb v1, lr, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
253 smlabb v3, lr, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
254 smlatb v5, ip, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
255 smulbb a4, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
256 smlatt v2, lr, a3, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
257 sub v7, v7, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
258 smlabt v4, lr, a3, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
259 smulbt a4, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
260 smlatt v6, ip, a3, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
261 sub fp, fp, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
262 .endm
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
263
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
264 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
265 .func idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
266 idct_col_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
267 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
268
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
269 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
270
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
271 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
272 adds a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
273 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
274 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
275 add ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
276 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
277 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
278 str a2, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
279 subs a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
280 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
281 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
282 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
283 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
284 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
285 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
286 str a2, [a1, #(16*7)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
287
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
288 subs a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
289 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
290 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
291 sub ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
292 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
293 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
294 str a2, [a1, #(16*1)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
295 adds a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
296 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
297 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
298 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
299 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
300 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
301 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
302 str a2, [a1, #(16*6)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
303
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
304 adds a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
305 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
306 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
307 add ip, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
308 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
309 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
310 str a2, [a1, #(16*2)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
311 subs a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
312 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
313 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
314 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
315 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
316 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
317 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
318 str a2, [a1, #(16*5)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
319
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
320 adds a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
321 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
322 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
323 add ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
324 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
325 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
326 str a2, [a1, #(16*3)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
327 subs a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
328 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
329 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
330 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
331 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
332 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
333 str a2, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
334
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
335 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
336 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
337
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
338 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
339 .func idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
340 idct_col_put_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
341 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
342
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
343 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
344
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
345 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
346 ldr lr, [sp, #32]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
347 add a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
348 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
349 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
350 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
351 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
352 add ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
353 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
354 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
355 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
356 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
357 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
358 sub a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
359 movs a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
360 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
361 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
362 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
363 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
364 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
365 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
366 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
367 ldr v1, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
368 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
369 strh a2, [v1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
370 add a2, v1, #2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
371 str a2, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
372 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
373 rsb v2, lr, lr, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
374 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
375 strh a2, [v2, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
376
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
377 sub a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
378 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
379 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
380 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
381 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
382 sub ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
383 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
384 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
385 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
386 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
387 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
388 strh a2, [v1, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
389 add a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
390 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
391 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
392 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
393 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
394 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
395 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
396 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
397 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
398 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
399 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
400 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
401 strh a2, [v2, -lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
402
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
403 add a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
404 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
405 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
406 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
407 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
408 add ip, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
409 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
410 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
411 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
412 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
413 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
414 strh a2, [v1, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
415 sub a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
416 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
417 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
418 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
419 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
420 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
421 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
422 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
423 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
424 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
425 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
426 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
427 strh a2, [v2, -lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
428
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
429 add a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
430 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
431 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
432 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
433 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
434 add ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
435 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
436 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
437 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
438 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
439 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
440 strh a2, [v1, lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
441 sub a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
442 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
443 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
444 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
445 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
446 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
447 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
448 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
449 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
450 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
451 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
452 strh a2, [v2, -lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
453
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
454 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
455 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
456
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
457 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
458 .func idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
459 idct_col_add_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
460 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
461
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
462 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
463
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
464 ldr lr, [sp, #36]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
465
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
466 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
467 ldrh ip, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
468 add a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
469 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
470 sub a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
471 and v1, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
472 adds a2, a2, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
473 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
474 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
475 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
476 add v1, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
477 mov v1, v1, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
478 adds v1, v1, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
479 movmi v1, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
480 cmp v1, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
481 movgt v1, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
482 orr a2, a2, v1, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
483 ldr v1, [sp, #32]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
484 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
485 rsb v2, v1, v1, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
486 ldrh ip, [v2, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
487 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
488 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
489 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
490 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
491 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
492 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
493 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
494 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
495 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
496 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
497 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
498 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
499 add a2, lr, #2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
500 str a2, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
501 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
502 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
503
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
504 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
505 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
506 sub a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
507 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
508 add a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
509 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
510 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
511 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
512 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
513 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
514 sub v3, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
515 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
516 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
517 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
518 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
519 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
520 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
521 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
522 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
523 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
524 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
525 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
526 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
527 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
528 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
529 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
530 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
531 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
532 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
533 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
534 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
535 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
536 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
537
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
538 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
539 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
540 add a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
541 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
542 sub a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
543 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
544 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
545 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
546 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
547 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
548 add v3, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
549 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
550 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
551 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
552 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
553 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
554 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
555 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
556 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
557 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
558 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
559 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
560 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
561 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
562 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
563 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
564 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
565 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
566 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
567 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
568 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
569 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
570 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
571
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
572 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
573 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
574 add a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
575 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
576 sub a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
577 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
578 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
579 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
580 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
581 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
582 add v3, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
583 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
584 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
585 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
586 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
587 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
588 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
589 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
590 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
591 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
592 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
593 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
594 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
595 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
596 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
597 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
598 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
599 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
600 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
601 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
602 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
603 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
604 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
605
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
606 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
607 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
608
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
609 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
610 .global simple_idct_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
611 .func simple_idct_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
612 simple_idct_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
613 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
614
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
615 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
616 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
617 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
618 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
619 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
620 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
621 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
622 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
623 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
624 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
625 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
626 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
627 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
628 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
629 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
630
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
631 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
632
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
633 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
634 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
635 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
636 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
637 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
638 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
639 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
640
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
641 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
642 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
643
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
644 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
645 .global simple_idct_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
646 .func simple_idct_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
647 simple_idct_add_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
648 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
649
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
650 mov a1, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
651
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
652 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
653 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
654 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
655 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
656 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
657 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
658 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
659 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
660 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
661 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
662 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
663 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
664 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
665 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
666 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
667
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
668 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
669
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
670 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
671 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
672 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
673 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
674 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
675 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
676 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
677
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
678 add sp, sp, #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
679 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
680 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
681
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
682 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
683 .global simple_idct_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
684 .func simple_idct_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
685 simple_idct_put_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
686 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
687
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
688 mov a1, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
689
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
690 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
691 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
692 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
693 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
694 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
695 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
696 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
697 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
698 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
699 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
700 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
701 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
702 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
703 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
704 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
705
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
706 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
707
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
708 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
709 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
710 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
711 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
712 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
713 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
714 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
715
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
716 add sp, sp, #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
717 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
718 .endfunc