annotate armv4l/simple_idct_armv5te.S @ 5229:604a09d6cf2e libavcodec

move ff_copy_bits to bitstream.c
author aurel
date Fri, 06 Jul 2007 14:13:25 +0000
parents 744e91a36a23
children 316762ae96a7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
1 /*
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
2 * Simple IDCT
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
3 *
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5220
744e91a36a23 update my email address
mru
parents: 4867
diff changeset
5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
6 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
7 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
8 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
13 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
17 * Lesser General Public License for more details.
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
18 *
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
22 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
23
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
31 #define ROW_SHIFT 11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
32 #define COL_SHIFT 20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
33
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
34 #define W13 (W1 | (W3 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
35 #define W26 (W2 | (W6 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
36 #define W57 (W5 | (W7 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
37
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
38 .text
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
39 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
40 w13: .long W13
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
41 w26: .long W26
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
42 w57: .long W57
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
43
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
44 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
45 .type idct_row_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
46 .func idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
47 idct_row_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
48 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
49
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
50 ldrd v1, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
51 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
52 orrs v1, v1, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
53 cmpeq v1, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
54 cmpeq v1, a3, lsr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
55 beq row_dc_only
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
56
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
57 mov v1, #(1<<(ROW_SHIFT-1))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
58 mov ip, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
59 sub ip, ip, #1 /* ip = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
60 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
61 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
62 smultb a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
63 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
64 add v2, v1, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
65 sub v3, v1, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
66 sub v4, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
67 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
68
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
69 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
70 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
71 smulbt v5, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
72 smultt v6, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
73 smlatt v5, ip, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
74 smultt a2, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
75 smulbt v7, lr, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
76 sub v6, v6, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
77 smulbt a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
78 smultt fp, lr, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
79 sub v7, v7, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
80 smulbt a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
81 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
82 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
83
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
84 orrs a2, a3, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
85 beq 1f
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
86
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
87 smlabt v5, lr, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
88 smlabt v6, ip, a3, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
89 smlatt v5, lr, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
90 smlabt v6, lr, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
91 smlatt v7, lr, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
92 smlatt fp, ip, a3, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
93 smulbt a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
94 smlatt v7, ip, a4, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
95 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
96
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
97 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
98 mov a2, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
99 sub a2, a2, #1 /* a2 = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
100 smulbb a2, a2, a3 /* a2 = W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
101 smultb lr, ip, a4 /* lr = W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
102 add v1, v1, a2 /* v1 += W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
103 add v1, v1, lr /* v1 += W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
104 add v4, v4, a2 /* v4 += W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
105 sub v4, v4, lr /* v4 -= W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
106 smulbb lr, ip, a4 /* lr = W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
107 sub v2, v2, a2 /* v2 -= W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
108 sub v2, v2, lr /* v2 -= W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
109 sub v3, v3, a2 /* v3 -= W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
110 add v3, v3, lr /* v3 += W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
111
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
112 1: add a2, v1, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
113 mov a3, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
114 bic a3, a3, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
115 sub a2, v2, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
116 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
117 add a3, a3, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
118 add a2, v3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
119 mov a4, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
120 bic a4, a4, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
121 add a2, v4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
122 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
123 add a4, a4, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
124 strd a3, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
125
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
126 sub a2, v4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
127 mov a3, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
128 bic a3, a3, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
129 sub a2, v3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
130 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
131 add a3, a3, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
132 add a2, v2, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
133 mov a4, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
134 bic a4, a4, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
135 sub a2, v1, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
136 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
137 add a4, a4, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
138 strd a3, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
139
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
140 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
141
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
142 row_dc_only:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
143 orr a3, a3, a3, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
144 bic a3, a3, #0xe000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
145 mov a3, a3, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
146 mov a4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
147 strd a3, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
148 strd a3, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
149
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
150 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
151 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
152
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
153 .macro idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
154 ldr a4, [a1] /* a4 = col[1:0] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
155 mov ip, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
156 sub ip, ip, #1 /* ip = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
157 #if 0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
158 mov v1, #(1<<(COL_SHIFT-1))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
159 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
160 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
161 ldr a4, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
162 #else
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
163 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
164 add v2, v1, a4, asr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
165 rsb v2, v2, v2, lsl #14
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
166 mov a4, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
167 add v1, v1, a4, asr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
168 ldr a4, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
169 rsb v1, v1, v1, lsl #14
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
170 #endif
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
171
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
172 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
173 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
174 sub v3, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
175 sub v5, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
176 add v7, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
177 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
178 sub v4, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
179 sub v6, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
180 add fp, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
181 ldr ip, [pc, #(w26-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
182 ldr a4, [a1, #(16*2)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
183 add v2, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
184
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
185 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
186 smultb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
187 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
188 sub v7, v7, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
189 add v3, v3, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
190 sub v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
191 smulbt lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
192 smultt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
193 add v2, v2, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
194 sub fp, fp, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
195 add v4, v4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
196 ldr a4, [a1, #(16*6)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
197 sub v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
198
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
199 smultb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
200 smulbb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
201 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
202 sub v7, v7, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
203 sub v3, v3, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
204 add v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
205 smultt lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
206 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
207 add v2, v2, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
208 sub fp, fp, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
209 sub v4, v4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
210 add v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
211
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
212 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
213
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
214 ldr ip, [pc, #(w13-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
215 ldr a4, [a1, #(16*1)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
216 ldr lr, [pc, #(w57-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
217 smulbb v1, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
218 smultb v3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
219 smulbb v5, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
220 smultb v7, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
221 smulbt v2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
222 smultt v4, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
223 smulbt v6, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
224 smultt fp, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
225 rsb v4, v4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
226 ldr a4, [a1, #(16*3)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
227 rsb v3, v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
228
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
229 smlatb v1, ip, a4, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
230 smlatb v3, lr, a4, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
231 smulbb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
232 smulbb a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
233 sub v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
234 sub v7, v7, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
235 smlatt v2, ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
236 smlatt v4, lr, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
237 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
238 smulbt a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
239 sub v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
240 ldr a4, [a1, #(16*5)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
241 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
242
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
243 smlabb v1, lr, a4, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
244 smlabb v3, ip, a4, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
245 smlatb v5, lr, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
246 smlatb v7, ip, a4, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
247 smlabt v2, lr, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
248 smlabt v4, ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
249 smlatt v6, lr, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
250 ldr a3, [a1, #(16*7)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
251 smlatt fp, ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
252
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
253 smlatb v1, lr, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
254 smlabb v3, lr, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
255 smlatb v5, ip, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
256 smulbb a4, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
257 smlatt v2, lr, a3, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
258 sub v7, v7, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
259 smlabt v4, lr, a3, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
260 smulbt a4, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
261 smlatt v6, ip, a3, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
262 sub fp, fp, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
263 .endm
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
264
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
265 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
266 .type idct_col_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
267 .func idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
268 idct_col_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
269 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
270
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
271 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
272
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
273 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
274 adds a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
275 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
276 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
277 add ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
278 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
279 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
280 str a2, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
281 subs a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
282 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
283 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
284 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
285 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
286 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
287 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
288 str a2, [a1, #(16*7)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
289
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
290 subs a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
291 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
292 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
293 sub ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
294 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
295 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
296 str a2, [a1, #(16*1)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
297 adds a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
298 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
299 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
300 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
301 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
302 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
303 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
304 str a2, [a1, #(16*6)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
305
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
306 adds a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
307 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
308 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
309 add ip, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
310 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
311 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
312 str a2, [a1, #(16*2)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
313 subs a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
314 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
315 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
316 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
317 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
318 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
319 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
320 str a2, [a1, #(16*5)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
321
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
322 adds a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
323 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
324 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
325 add ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
326 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
327 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
328 str a2, [a1, #(16*3)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
329 subs a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
330 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
331 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
332 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
333 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
334 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
335 str a2, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
336
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
337 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
338 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
339
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
340 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
341 .type idct_col_put_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
342 .func idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
343 idct_col_put_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
344 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
345
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
346 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
347
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
348 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
349 ldr lr, [sp, #32]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
350 add a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
351 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
352 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
353 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
354 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
355 add ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
356 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
357 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
358 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
359 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
360 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
361 sub a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
362 movs a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
363 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
364 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
365 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
366 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
367 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
368 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
369 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
370 ldr v1, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
371 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
372 strh a2, [v1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
373 add a2, v1, #2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
374 str a2, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
375 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
376 rsb v2, lr, lr, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
377 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
378 strh a2, [v2, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
379
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
380 sub a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
381 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
382 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
383 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
384 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
385 sub ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
386 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
387 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
388 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
389 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
390 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
391 strh a2, [v1, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
392 add a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
393 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
394 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
395 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
396 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
397 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
398 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
399 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
400 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
401 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
402 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
403 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
404 strh a2, [v2, -lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
405
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
406 add a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
407 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
408 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
409 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
410 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
411 add ip, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
412 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
413 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
414 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
415 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
416 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
417 strh a2, [v1, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
418 sub a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
419 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
420 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
421 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
422 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
423 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
424 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
425 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
426 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
427 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
428 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
429 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
430 strh a2, [v2, -lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
431
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
432 add a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
433 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
434 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
435 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
436 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
437 add ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
438 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
439 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
440 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
441 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
442 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
443 strh a2, [v1, lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
444 sub a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
445 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
446 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
447 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
448 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
449 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
450 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
451 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
452 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
453 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
454 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
455 strh a2, [v2, -lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
456
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
457 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
458 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
459
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
460 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
461 .type idct_col_add_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
462 .func idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
463 idct_col_add_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
464 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
465
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
466 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
467
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
468 ldr lr, [sp, #36]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
469
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
470 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
471 ldrh ip, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
472 add a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
473 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
474 sub a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
475 and v1, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
476 adds a2, a2, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
477 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
478 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
479 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
480 add v1, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
481 mov v1, v1, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
482 adds v1, v1, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
483 movmi v1, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
484 cmp v1, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
485 movgt v1, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
486 orr a2, a2, v1, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
487 ldr v1, [sp, #32]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
488 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
489 rsb v2, v1, v1, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
490 ldrh ip, [v2, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
491 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
492 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
493 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
494 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
495 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
496 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
497 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
498 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
499 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
500 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
501 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
502 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
503 add a2, lr, #2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
504 str a2, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
505 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
506 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
507
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
508 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
509 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
510 sub a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
511 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
512 add a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
513 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
514 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
515 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
516 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
517 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
518 sub v3, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
519 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
520 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
521 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
522 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
523 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
524 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
525 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
526 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
527 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
528 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
529 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
530 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
531 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
532 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
533 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
534 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
535 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
536 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
537 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
538 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
539 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
540 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
541
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
542 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
543 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
544 add a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
545 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
546 sub a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
547 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
548 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
549 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
550 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
551 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
552 add v3, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
553 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
554 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
555 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
556 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
557 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
558 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
559 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
560 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
561 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
562 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
563 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
564 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
565 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
566 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
567 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
568 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
569 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
570 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
571 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
572 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
573 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
574 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
575
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
576 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
577 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
578 add a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
579 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
580 sub a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
581 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
582 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
583 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
584 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
585 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
586 add v3, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
587 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
588 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
589 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
590 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
591 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
592 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
593 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
594 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
595 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
596 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
597 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
598 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
599 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
600 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
601 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
602 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
603 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
604 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
605 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
606 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
607 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
608 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
609
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
610 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
611 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
612
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
613 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
614 .global simple_idct_armv5te
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
615 .type simple_idct_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
616 .func simple_idct_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
617 simple_idct_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
618 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
619
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
620 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
621 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
622 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
623 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
624 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
625 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
626 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
627 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
628 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
629 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
630 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
631 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
632 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
633 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
634 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
635
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
636 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
637
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
638 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
639 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
640 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
641 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
642 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
643 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
644 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
645
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
646 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
647 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
648
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
649 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
650 .global simple_idct_add_armv5te
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
651 .type simple_idct_add_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
652 .func simple_idct_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
653 simple_idct_add_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
654 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
655
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
656 mov a1, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
657
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
658 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
659 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
660 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
661 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
662 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
663 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
664 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
665 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
666 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
667 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
668 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
669 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
670 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
671 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
672 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
673
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
674 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
675
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
676 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
677 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
678 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
679 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
680 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
681 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
682 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
683
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
684 add sp, sp, #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
685 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
686 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
687
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
688 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
689 .global simple_idct_put_armv5te
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
690 .type simple_idct_put_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
691 .func simple_idct_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
692 simple_idct_put_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
693 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
694
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
695 mov a1, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
696
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
697 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
698 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
699 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
700 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
701 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
702 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
703 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
704 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
705 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
706 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
707 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
708 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
709 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
710 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
711 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
712
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
713 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
714
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
715 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
716 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
717 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
718 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
719 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
720 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
721 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
722
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
723 add sp, sp, #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
724 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
725 .endfunc