annotate armv4l/simple_idct_armv5te.S @ 5757:ace63c809071 libavcodec

Remove uses of SIGILL for CPU extension detection, that method is not acceptable in a library. Should not change anything for PPC, the autodetection is currently pointless due to other code being compiled with -maltivec as well (and detection for OSX and AmigaOS remains in place). SPARC binaries built with VIS support can now only run on systems with VIS.
author reimar
date Tue, 02 Oct 2007 18:18:35 +0000
parents 744e91a36a23
children 316762ae96a7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
1 /*
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
2 * Simple IDCT
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
3 *
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5220
744e91a36a23 update my email address
mru
parents: 4867
diff changeset
5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
6 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
7 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
8 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
13 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
17 * Lesser General Public License for more details.
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
18 *
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3769
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
22 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
23
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
31 #define ROW_SHIFT 11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
32 #define COL_SHIFT 20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
33
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
34 #define W13 (W1 | (W3 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
35 #define W26 (W2 | (W6 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
36 #define W57 (W5 | (W7 << 16))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
37
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
38 .text
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
39 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
40 w13: .long W13
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
41 w26: .long W26
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
42 w57: .long W57
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
43
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
44 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
45 .type idct_row_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
46 .func idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
47 idct_row_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
48 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
49
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
50 ldrd v1, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
51 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
52 orrs v1, v1, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
53 cmpeq v1, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
54 cmpeq v1, a3, lsr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
55 beq row_dc_only
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
56
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
57 mov v1, #(1<<(ROW_SHIFT-1))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
58 mov ip, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
59 sub ip, ip, #1 /* ip = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
60 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
61 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
62 smultb a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
63 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
64 add v2, v1, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
65 sub v3, v1, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
66 sub v4, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
67 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
68
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
69 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
70 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
71 smulbt v5, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
72 smultt v6, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
73 smlatt v5, ip, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
74 smultt a2, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
75 smulbt v7, lr, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
76 sub v6, v6, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
77 smulbt a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
78 smultt fp, lr, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
79 sub v7, v7, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
80 smulbt a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
81 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
82 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
83
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
84 orrs a2, a3, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
85 beq 1f
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
86
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
87 smlabt v5, lr, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
88 smlabt v6, ip, a3, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
89 smlatt v5, lr, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
90 smlabt v6, lr, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
91 smlatt v7, lr, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
92 smlatt fp, ip, a3, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
93 smulbt a2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
94 smlatt v7, ip, a4, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
95 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
96
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
97 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
98 mov a2, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
99 sub a2, a2, #1 /* a2 = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
100 smulbb a2, a2, a3 /* a2 = W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
101 smultb lr, ip, a4 /* lr = W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
102 add v1, v1, a2 /* v1 += W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
103 add v1, v1, lr /* v1 += W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
104 add v4, v4, a2 /* v4 += W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
105 sub v4, v4, lr /* v4 -= W6*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
106 smulbb lr, ip, a4 /* lr = W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
107 sub v2, v2, a2 /* v2 -= W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
108 sub v2, v2, lr /* v2 -= W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
109 sub v3, v3, a2 /* v3 -= W4*row[4] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
110 add v3, v3, lr /* v3 += W2*row[6] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
111
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
112 1: add a2, v1, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
113 mov a3, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
114 bic a3, a3, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
115 sub a2, v2, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
116 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
117 add a3, a3, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
118 add a2, v3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
119 mov a4, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
120 bic a4, a4, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
121 add a2, v4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
122 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
123 add a4, a4, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
124 strd a3, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
125
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
126 sub a2, v4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
127 mov a3, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
128 bic a3, a3, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
129 sub a2, v3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
130 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
131 add a3, a3, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
132 add a2, v2, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
133 mov a4, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
134 bic a4, a4, #0x1f0000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
135 sub a2, v1, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
136 mov a2, a2, lsr #11
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
137 add a4, a4, a2, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
138 strd a3, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
139
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
140 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
141
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
142 row_dc_only:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
143 orr a3, a3, a3, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
144 bic a3, a3, #0xe000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
145 mov a3, a3, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
146 mov a4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
147 strd a3, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
148 strd a3, [a1, #8]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
149
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
150 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
151 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
152
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
153 .macro idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
154 ldr a4, [a1] /* a4 = col[1:0] */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
155 mov ip, #16384
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
156 sub ip, ip, #1 /* ip = W4 */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
157 #if 0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
158 mov v1, #(1<<(COL_SHIFT-1))
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
159 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
160 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
161 ldr a4, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
162 #else
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
163 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
164 add v2, v1, a4, asr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
165 rsb v2, v2, v2, lsl #14
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
166 mov a4, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
167 add v1, v1, a4, asr #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
168 ldr a4, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
169 rsb v1, v1, v1, lsl #14
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
170 #endif
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
171
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
172 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
173 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
174 sub v3, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
175 sub v5, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
176 add v7, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
177 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
178 sub v4, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
179 sub v6, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
180 add fp, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
181 ldr ip, [pc, #(w26-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
182 ldr a4, [a1, #(16*2)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
183 add v2, v2, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
184
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
185 smulbb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
186 smultb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
187 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
188 sub v7, v7, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
189 add v3, v3, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
190 sub v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
191 smulbt lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
192 smultt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
193 add v2, v2, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
194 sub fp, fp, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
195 add v4, v4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
196 ldr a4, [a1, #(16*6)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
197 sub v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
198
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
199 smultb lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
200 smulbb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
201 add v1, v1, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
202 sub v7, v7, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
203 sub v3, v3, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
204 add v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
205 smultt lr, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
206 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
207 add v2, v2, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
208 sub fp, fp, lr
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
209 sub v4, v4, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
210 add v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
211
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
212 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
213
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
214 ldr ip, [pc, #(w13-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
215 ldr a4, [a1, #(16*1)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
216 ldr lr, [pc, #(w57-.-8)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
217 smulbb v1, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
218 smultb v3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
219 smulbb v5, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
220 smultb v7, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
221 smulbt v2, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
222 smultt v4, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
223 smulbt v6, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
224 smultt fp, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
225 rsb v4, v4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
226 ldr a4, [a1, #(16*3)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
227 rsb v3, v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
228
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
229 smlatb v1, ip, a4, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
230 smlatb v3, lr, a4, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
231 smulbb a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
232 smulbb a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
233 sub v5, v5, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
234 sub v7, v7, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
235 smlatt v2, ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
236 smlatt v4, lr, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
237 smulbt a3, ip, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
238 smulbt a2, lr, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
239 sub v6, v6, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
240 ldr a4, [a1, #(16*5)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
241 sub fp, fp, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
242
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
243 smlabb v1, lr, a4, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
244 smlabb v3, ip, a4, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
245 smlatb v5, lr, a4, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
246 smlatb v7, ip, a4, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
247 smlabt v2, lr, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
248 smlabt v4, ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
249 smlatt v6, lr, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
250 ldr a3, [a1, #(16*7)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
251 smlatt fp, ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
252
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
253 smlatb v1, lr, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
254 smlabb v3, lr, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
255 smlatb v5, ip, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
256 smulbb a4, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
257 smlatt v2, lr, a3, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
258 sub v7, v7, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
259 smlabt v4, lr, a3, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
260 smulbt a4, ip, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
261 smlatt v6, ip, a3, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
262 sub fp, fp, a4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
263 .endm
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
264
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
265 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
266 .type idct_col_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
267 .func idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
268 idct_col_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
269 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
270
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
271 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
272
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
273 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
274 adds a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
275 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
276 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
277 add ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
278 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
279 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
280 str a2, [a1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
281 subs a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
282 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
283 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
284 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
285 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
286 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
287 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
288 str a2, [a1, #(16*7)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
289
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
290 subs a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
291 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
292 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
293 sub ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
294 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
295 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
296 str a2, [a1, #(16*1)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
297 adds a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
298 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
299 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
300 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
301 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
302 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
303 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
304 str a2, [a1, #(16*6)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
305
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
306 adds a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
307 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
308 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
309 add ip, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
310 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
311 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
312 str a2, [a1, #(16*2)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
313 subs a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
314 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
315 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
316 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
317 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
318 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
319 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
320 str a2, [a1, #(16*5)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
321
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
322 adds a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
323 mov a2, a2, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
324 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
325 add ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
326 mov ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
327 orr a2, a2, ip, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
328 str a2, [a1, #(16*3)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
329 subs a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
330 mov a2, a3, lsr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
331 orrmi a2, a2, #0xf000
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
332 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
333 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
334 orr a2, a2, a4, lsl #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
335 str a2, [a1, #(16*4)]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
336
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
337 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
338 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
339
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
340 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
341 .type idct_col_put_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
342 .func idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
343 idct_col_put_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
344 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
345
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
346 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
347
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
348 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
349 ldr lr, [sp, #32]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
350 add a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
351 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
352 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
353 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
354 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
355 add ip, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
356 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
357 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
358 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
359 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
360 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
361 sub a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
362 movs a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
363 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
364 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
365 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
366 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
367 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
368 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
369 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
370 ldr v1, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
371 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
372 strh a2, [v1]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
373 add a2, v1, #2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
374 str a2, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
375 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
376 rsb v2, lr, lr, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
377 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
378 strh a2, [v2, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
379
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
380 sub a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
381 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
382 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
383 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
384 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
385 sub ip, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
386 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
387 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
388 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
389 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
390 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
391 strh a2, [v1, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
392 add a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
393 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
394 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
395 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
396 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
397 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
398 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
399 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
400 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
401 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
402 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
403 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
404 strh a2, [v2, -lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
405
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
406 add a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
407 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
408 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
409 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
410 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
411 add ip, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
412 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
413 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
414 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
415 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
416 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
417 strh a2, [v1, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
418 sub a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
419 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
420 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
421 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
422 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
423 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
424 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
425 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
426 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
427 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
428 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
429 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
430 strh a2, [v2, -lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
431
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
432 add a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
433 movs a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
434 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
435 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
436 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
437 add ip, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
438 movs ip, ip, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
439 movmi ip, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
440 cmp ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
441 movgt ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
442 orr a2, a2, ip, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
443 strh a2, [v1, lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
444 sub a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
445 movs a2, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
446 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
447 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
448 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
449 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
450 movs a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
451 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
452 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
453 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
454 orr a2, a2, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
455 strh a2, [v2, -lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
456
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
457 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
458 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
459
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
460 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
461 .type idct_col_add_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
462 .func idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
463 idct_col_add_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
464 str lr, [sp, #-4]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
465
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
466 idct_col
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
467
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
468 ldr lr, [sp, #36]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
469
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
470 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
471 ldrh ip, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
472 add a2, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
473 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
474 sub a3, a3, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
475 and v1, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
476 adds a2, a2, v1
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
477 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
478 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
479 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
480 add v1, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
481 mov v1, v1, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
482 adds v1, v1, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
483 movmi v1, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
484 cmp v1, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
485 movgt v1, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
486 orr a2, a2, v1, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
487 ldr v1, [sp, #32]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
488 sub a4, a4, v2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
489 rsb v2, v1, v1, lsl #3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
490 ldrh ip, [v2, lr]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
491 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
492 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
493 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
494 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
495 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
496 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
497 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
498 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
499 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
500 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
501 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
502 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
503 add a2, lr, #2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
504 str a2, [sp, #28]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
505 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
506 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
507
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
508 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
509 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
510 sub a2, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
511 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
512 add a3, a3, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
513 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
514 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
515 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
516 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
517 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
518 sub v3, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
519 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
520 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
521 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
522 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
523 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
524 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
525 add a4, a4, v4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
526 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
527 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
528 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
529 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
530 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
531 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
532 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
533 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
534 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
535 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
536 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
537 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
538 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
539 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
540 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
541
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
542 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
543 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
544 add a2, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
545 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
546 sub a3, a3, v5
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
547 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
548 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
549 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
550 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
551 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
552 add v3, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
553 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
554 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
555 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
556 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
557 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
558 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
559 sub a4, a4, v6
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
560 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
561 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
562 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
563 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
564 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
565 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
566 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
567 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
568 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
569 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
570 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
571 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
572 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
573 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
574 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
575
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
576 ldmfd sp!, {a3, a4}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
577 ldrh ip, [lr, v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
578 add a2, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
579 mov a2, a2, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
580 sub a3, a3, v7
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
581 and v3, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
582 adds a2, a2, v3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
583 movmi a2, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
584 cmp a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
585 movgt a2, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
586 add v3, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
587 mov v3, v3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
588 adds v3, v3, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
589 movmi v3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
590 cmp v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
591 movgt v3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
592 orr a2, a2, v3, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
593 sub a4, a4, fp
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
594 ldrh ip, [v2, -v1]!
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
595 strh a2, [lr]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
596 mov a3, a3, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
597 and a2, ip, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
598 adds a3, a3, a2
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
599 movmi a3, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
600 cmp a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
601 movgt a3, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
602 mov a4, a4, asr #20
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
603 adds a4, a4, ip, lsr #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
604 movmi a4, #0
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
605 cmp a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
606 movgt a4, #255
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
607 orr a2, a3, a4, lsl #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
608 strh a2, [v2]
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
609
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
610 ldr pc, [sp], #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
611 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
612
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
613 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
614 .global simple_idct_armv5te
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
615 .type simple_idct_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
616 .func simple_idct_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
617 simple_idct_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
618 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
619
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
620 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
621 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
622 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
623 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
624 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
625 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
626 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
627 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
628 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
629 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
630 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
631 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
632 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
633 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
634 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
635
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
636 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
637
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
638 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
639 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
640 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
641 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
642 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
643 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
644 bl idct_col_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
645
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
646 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
647 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
648
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
649 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
650 .global simple_idct_add_armv5te
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
651 .type simple_idct_add_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
652 .func simple_idct_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
653 simple_idct_add_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
654 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
655
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
656 mov a1, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
657
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
658 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
659 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
660 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
661 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
662 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
663 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
664 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
665 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
666 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
667 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
668 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
669 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
670 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
671 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
672 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
673
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
674 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
675
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
676 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
677 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
678 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
679 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
680 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
681 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
682 bl idct_col_add_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
683
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
684 add sp, sp, #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
685 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
686 .endfunc
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
687
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
688 .align
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
689 .global simple_idct_put_armv5te
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 3947
diff changeset
690 .type simple_idct_put_armv5te, %function
3769
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
691 .func simple_idct_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
692 simple_idct_put_armv5te:
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
693 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
694
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
695 mov a1, a3
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
696
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
697 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
698 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
699 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
700 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
701 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
702 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
703 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
704 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
705 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
706 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
707 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
708 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
709 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
710 add a1, a1, #16
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
711 bl idct_row_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
712
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
713 sub a1, a1, #(16*7)
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
714
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
715 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
716 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
717 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
718 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
719 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
720 add a1, a1, #4
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
721 bl idct_col_put_armv5te
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
722
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
723 add sp, sp, #8
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
724 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
cf04e15a72ac ARMv5TE optimized IDCT
mru
parents:
diff changeset
725 .endfunc