annotate armv4l/jrevdct_arm.S @ 509:cab79946302f libavcodec

Implement put_pixels_clamped and add_pixels_clamped in Assembler. This allows better scheduling of the memory accesses, and is portable among all compilers.
author mellum
date Mon, 01 Jul 2002 04:26:07 +0000
parents fefaa96def6e
children ef2149182f1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
61
fefaa96def6e arm specific code
glantau
parents:
diff changeset
1 /*
fefaa96def6e arm specific code
glantau
parents:
diff changeset
2 C-like prototype :
fefaa96def6e arm specific code
glantau
parents:
diff changeset
3 void j_rev_dct_ARM(DCTBLOCK data)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
4
fefaa96def6e arm specific code
glantau
parents:
diff changeset
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
6
fefaa96def6e arm specific code
glantau
parents:
diff changeset
7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
8
fefaa96def6e arm specific code
glantau
parents:
diff changeset
9 Permission is hereby granted, free of charge, to any person obtaining a copy
fefaa96def6e arm specific code
glantau
parents:
diff changeset
10 of this software and associated documentation files (the "Software"), to deal
fefaa96def6e arm specific code
glantau
parents:
diff changeset
11 in the Software without restriction, including without limitation the rights
fefaa96def6e arm specific code
glantau
parents:
diff changeset
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
fefaa96def6e arm specific code
glantau
parents:
diff changeset
13 copies of the Software, and to permit persons to whom the Software is
fefaa96def6e arm specific code
glantau
parents:
diff changeset
14 furnished to do so, subject to the following conditions:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
15
fefaa96def6e arm specific code
glantau
parents:
diff changeset
16 The above copyright notice and this permission notice shall be included in
fefaa96def6e arm specific code
glantau
parents:
diff changeset
17 all copies or substantial portions of the Software.
fefaa96def6e arm specific code
glantau
parents:
diff changeset
18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
fefaa96def6e arm specific code
glantau
parents:
diff changeset
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
fefaa96def6e arm specific code
glantau
parents:
diff changeset
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
fefaa96def6e arm specific code
glantau
parents:
diff changeset
22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
fefaa96def6e arm specific code
glantau
parents:
diff changeset
23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
fefaa96def6e arm specific code
glantau
parents:
diff changeset
24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
fefaa96def6e arm specific code
glantau
parents:
diff changeset
25
fefaa96def6e arm specific code
glantau
parents:
diff changeset
26 */
fefaa96def6e arm specific code
glantau
parents:
diff changeset
27 #define FIX_0_298631336 2446
fefaa96def6e arm specific code
glantau
parents:
diff changeset
28 #define FIX_0_541196100 4433
fefaa96def6e arm specific code
glantau
parents:
diff changeset
29 #define FIX_0_765366865 6270
fefaa96def6e arm specific code
glantau
parents:
diff changeset
30 #define FIX_1_175875602 9633
fefaa96def6e arm specific code
glantau
parents:
diff changeset
31 #define FIX_1_501321110 12299
fefaa96def6e arm specific code
glantau
parents:
diff changeset
32 #define FIX_2_053119869 16819
fefaa96def6e arm specific code
glantau
parents:
diff changeset
33 #define FIX_3_072711026 25172
fefaa96def6e arm specific code
glantau
parents:
diff changeset
34 #define FIX_M_0_390180644 -3196
fefaa96def6e arm specific code
glantau
parents:
diff changeset
35 #define FIX_M_0_899976223 -7373
fefaa96def6e arm specific code
glantau
parents:
diff changeset
36 #define FIX_M_1_847759065 -15137
fefaa96def6e arm specific code
glantau
parents:
diff changeset
37 #define FIX_M_1_961570560 -16069
fefaa96def6e arm specific code
glantau
parents:
diff changeset
38 #define FIX_M_2_562915447 -20995
fefaa96def6e arm specific code
glantau
parents:
diff changeset
39 #define FIX_0xFFFF 0xFFFF
fefaa96def6e arm specific code
glantau
parents:
diff changeset
40
fefaa96def6e arm specific code
glantau
parents:
diff changeset
41 #define FIX_0_298631336_ID 0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
42 #define FIX_0_541196100_ID 4
fefaa96def6e arm specific code
glantau
parents:
diff changeset
43 #define FIX_0_765366865_ID 8
fefaa96def6e arm specific code
glantau
parents:
diff changeset
44 #define FIX_1_175875602_ID 12
fefaa96def6e arm specific code
glantau
parents:
diff changeset
45 #define FIX_1_501321110_ID 16
fefaa96def6e arm specific code
glantau
parents:
diff changeset
46 #define FIX_2_053119869_ID 20
fefaa96def6e arm specific code
glantau
parents:
diff changeset
47 #define FIX_3_072711026_ID 24
fefaa96def6e arm specific code
glantau
parents:
diff changeset
48 #define FIX_M_0_390180644_ID 28
fefaa96def6e arm specific code
glantau
parents:
diff changeset
49 #define FIX_M_0_899976223_ID 32
fefaa96def6e arm specific code
glantau
parents:
diff changeset
50 #define FIX_M_1_847759065_ID 36
fefaa96def6e arm specific code
glantau
parents:
diff changeset
51 #define FIX_M_1_961570560_ID 40
fefaa96def6e arm specific code
glantau
parents:
diff changeset
52 #define FIX_M_2_562915447_ID 44
fefaa96def6e arm specific code
glantau
parents:
diff changeset
53 #define FIX_0xFFFF_ID 48
fefaa96def6e arm specific code
glantau
parents:
diff changeset
54 .text
fefaa96def6e arm specific code
glantau
parents:
diff changeset
55 .align
fefaa96def6e arm specific code
glantau
parents:
diff changeset
56
fefaa96def6e arm specific code
glantau
parents:
diff changeset
57 .global j_rev_dct_ARM
fefaa96def6e arm specific code
glantau
parents:
diff changeset
58 j_rev_dct_ARM:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs
fefaa96def6e arm specific code
glantau
parents:
diff changeset
60
fefaa96def6e arm specific code
glantau
parents:
diff changeset
61 sub sp, sp, #4 @ reserve some space on the stack
fefaa96def6e arm specific code
glantau
parents:
diff changeset
62 str r0, [ sp ] @ save the DCT pointer to the stack
fefaa96def6e arm specific code
glantau
parents:
diff changeset
63
fefaa96def6e arm specific code
glantau
parents:
diff changeset
64 mov lr, r0 @ lr = pointer to the current row
fefaa96def6e arm specific code
glantau
parents:
diff changeset
65 mov r12, #8 @ r12 = row-counter
fefaa96def6e arm specific code
glantau
parents:
diff changeset
66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
fefaa96def6e arm specific code
glantau
parents:
diff changeset
67 row_loop:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
68 ldrsh r0, [lr, # 0] @ r0 = 'd0'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
69 ldrsh r1, [lr, # 8] @ r1 = 'd1'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
70
fefaa96def6e arm specific code
glantau
parents:
diff changeset
71 @ Optimization for row that have all items except the first set to 0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
72 @ (this works as the DCTELEMS are always 4-byte aligned)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
73 ldr r5, [lr, # 0]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
74 ldr r2, [lr, # 4]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
75 ldr r3, [lr, # 8]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
76 ldr r4, [lr, #12]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
77 orr r3, r3, r4
fefaa96def6e arm specific code
glantau
parents:
diff changeset
78 orr r3, r3, r2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
79 orrs r5, r3, r5
fefaa96def6e arm specific code
glantau
parents:
diff changeset
80 beq end_of_row_loop @ nothing to be done as ALL of them are '0'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
81 orrs r2, r3, r1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
82 beq empty_row
fefaa96def6e arm specific code
glantau
parents:
diff changeset
83
fefaa96def6e arm specific code
glantau
parents:
diff changeset
84 ldrsh r2, [lr, # 2] @ r2 = 'd2'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
85 ldrsh r4, [lr, # 4] @ r4 = 'd4'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
86 ldrsh r6, [lr, # 6] @ r6 = 'd6'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
87
fefaa96def6e arm specific code
glantau
parents:
diff changeset
88 ldr r3, [r11, #FIX_0_541196100_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
89 add r7, r2, r6
fefaa96def6e arm specific code
glantau
parents:
diff changeset
90 ldr r5, [r11, #FIX_M_1_847759065_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
91 mul r7, r3, r7 @ r7 = z1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
92 ldr r3, [r11, #FIX_0_765366865_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
93 mla r6, r5, r6, r7 @ r6 = tmp2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
94 add r5, r0, r4 @ r5 = tmp0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
95 mla r2, r3, r2, r7 @ r2 = tmp3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
96 sub r3, r0, r4 @ r3 = tmp1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
97
fefaa96def6e arm specific code
glantau
parents:
diff changeset
98 add r0, r2, r5, lsl #13 @ r0 = tmp10
fefaa96def6e arm specific code
glantau
parents:
diff changeset
99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
fefaa96def6e arm specific code
glantau
parents:
diff changeset
100 add r4, r6, r3, lsl #13 @ r4 = tmp11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12
fefaa96def6e arm specific code
glantau
parents:
diff changeset
102
fefaa96def6e arm specific code
glantau
parents:
diff changeset
103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
104
fefaa96def6e arm specific code
glantau
parents:
diff changeset
105 ldrsh r3, [lr, #10] @ r3 = 'd3'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
106 ldrsh r5, [lr, #12] @ r5 = 'd5'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
107 ldrsh r7, [lr, #14] @ r7 = 'd7'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
108
fefaa96def6e arm specific code
glantau
parents:
diff changeset
109 add r0, r3, r5 @ r0 = 'z2'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
110 add r2, r1, r7 @ r2 = 'z1'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
111 add r4, r3, r7 @ r4 = 'z3'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
112 add r6, r1, r5 @ r6 = 'z4'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
113 ldr r9, [r11, #FIX_1_175875602_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
114 add r8, r4, r6 @ r8 = z3 + z4
fefaa96def6e arm specific code
glantau
parents:
diff changeset
115 ldr r10, [r11, #FIX_M_0_899976223_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
116 mul r8, r9, r8 @ r8 = 'z5'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
117 ldr r9, [r11, #FIX_M_2_562915447_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
118 mul r2, r10, r2 @ r2 = 'z1'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
119 ldr r10, [r11, #FIX_M_1_961570560_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
120 mul r0, r9, r0 @ r0 = 'z2'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
121 ldr r9, [r11, #FIX_M_0_390180644_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
122 mla r4, r10, r4, r8 @ r4 = 'z3'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
123 ldr r10, [r11, #FIX_0_298631336_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
124 mla r6, r9, r6, r8 @ r6 = 'z4'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
125 ldr r9, [r11, #FIX_2_053119869_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
126 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
127 ldr r10, [r11, #FIX_3_072711026_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
128 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
129 ldr r9, [r11, #FIX_1_501321110_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
130 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
131 add r7, r7, r4 @ r7 = tmp0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
132 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
133 add r5, r5, r6 @ r5 = tmp1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
134 add r3, r3, r4 @ r3 = tmp2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
135 add r1, r1, r6 @ r1 = tmp3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
136
fefaa96def6e arm specific code
glantau
parents:
diff changeset
137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
139
fefaa96def6e arm specific code
glantau
parents:
diff changeset
140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
141 add r8, r0, r1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
142 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
143 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
144 strh r8, [lr, # 0]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
145
fefaa96def6e arm specific code
glantau
parents:
diff changeset
146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
147 sub r8, r0, r1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
148 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
149 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
150 strh r8, [lr, #14]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
151
fefaa96def6e arm specific code
glantau
parents:
diff changeset
152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
153 add r8, r6, r3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
154 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
155 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
156 strh r8, [lr, # 2]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
157
fefaa96def6e arm specific code
glantau
parents:
diff changeset
158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
159 sub r8, r6, r3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
160 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
161 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
162 strh r8, [lr, #12]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
163
fefaa96def6e arm specific code
glantau
parents:
diff changeset
164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
165 add r8, r4, r5
fefaa96def6e arm specific code
glantau
parents:
diff changeset
166 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
167 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
168 strh r8, [lr, # 4]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
169
fefaa96def6e arm specific code
glantau
parents:
diff changeset
170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
171 sub r8, r4, r5
fefaa96def6e arm specific code
glantau
parents:
diff changeset
172 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
173 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
174 strh r8, [lr, #10]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
175
fefaa96def6e arm specific code
glantau
parents:
diff changeset
176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
177 add r8, r2, r7
fefaa96def6e arm specific code
glantau
parents:
diff changeset
178 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
179 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
180 strh r8, [lr, # 6]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
181
fefaa96def6e arm specific code
glantau
parents:
diff changeset
182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
183 sub r8, r2, r7
fefaa96def6e arm specific code
glantau
parents:
diff changeset
184 add r8, r8, #(1<<10)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
185 mov r8, r8, asr #11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
186 strh r8, [lr, # 8]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
187
fefaa96def6e arm specific code
glantau
parents:
diff changeset
188 @ End of row loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
189 add lr, lr, #16
fefaa96def6e arm specific code
glantau
parents:
diff changeset
190 subs r12, r12, #1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
191 bne row_loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
192 beq start_column_loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
193
fefaa96def6e arm specific code
glantau
parents:
diff changeset
194 empty_row:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
195 ldr r1, [r11, #FIX_0xFFFF_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
196 mov r0, r0, lsl #2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
197 and r0, r0, r1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
198 add r0, r0, r0, lsl #16
fefaa96def6e arm specific code
glantau
parents:
diff changeset
199 str r0, [lr, # 0]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
200 str r0, [lr, # 4]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
201 str r0, [lr, # 8]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
202 str r0, [lr, #12]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
203
fefaa96def6e arm specific code
glantau
parents:
diff changeset
204 end_of_row_loop:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
205 @ End of loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
206 add lr, lr, #16
fefaa96def6e arm specific code
glantau
parents:
diff changeset
207 subs r12, r12, #1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
208 bne row_loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
209
fefaa96def6e arm specific code
glantau
parents:
diff changeset
210 start_column_loop:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
211 @ Start of column loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
212 ldr lr, [ sp ]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
213 mov r12, #8
fefaa96def6e arm specific code
glantau
parents:
diff changeset
214 column_loop:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
215 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
216 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
217 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
218 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
219
fefaa96def6e arm specific code
glantau
parents:
diff changeset
220 ldr r3, [r11, #FIX_0_541196100_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
221 add r1, r2, r6
fefaa96def6e arm specific code
glantau
parents:
diff changeset
222 ldr r5, [r11, #FIX_M_1_847759065_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
223 mul r1, r3, r1 @ r1 = z1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
224 ldr r3, [r11, #FIX_0_765366865_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
225 mla r6, r5, r6, r1 @ r6 = tmp2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
226 add r5, r0, r4 @ r5 = tmp0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
227 mla r2, r3, r2, r1 @ r2 = tmp3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
228 sub r3, r0, r4 @ r3 = tmp1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
229
fefaa96def6e arm specific code
glantau
parents:
diff changeset
230 add r0, r2, r5, lsl #13 @ r0 = tmp10
fefaa96def6e arm specific code
glantau
parents:
diff changeset
231 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
fefaa96def6e arm specific code
glantau
parents:
diff changeset
232 add r4, r6, r3, lsl #13 @ r4 = tmp11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
233 rsb r6, r6, r3, lsl #13 @ r6 = tmp12
fefaa96def6e arm specific code
glantau
parents:
diff changeset
234
fefaa96def6e arm specific code
glantau
parents:
diff changeset
235 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
236 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
237 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
238 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
239
fefaa96def6e arm specific code
glantau
parents:
diff changeset
240 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
241 orr r9, r1, r3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
242 orr r10, r5, r7
fefaa96def6e arm specific code
glantau
parents:
diff changeset
243 orrs r10, r9, r10
fefaa96def6e arm specific code
glantau
parents:
diff changeset
244 beq empty_odd_column
fefaa96def6e arm specific code
glantau
parents:
diff changeset
245
fefaa96def6e arm specific code
glantau
parents:
diff changeset
246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
fefaa96def6e arm specific code
glantau
parents:
diff changeset
247
fefaa96def6e arm specific code
glantau
parents:
diff changeset
248 add r0, r3, r5 @ r0 = 'z2'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
249 add r2, r1, r7 @ r2 = 'z1'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
250 add r4, r3, r7 @ r4 = 'z3'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
251 add r6, r1, r5 @ r6 = 'z4'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
252 ldr r9, [r11, #FIX_1_175875602_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
253 add r8, r4, r6
fefaa96def6e arm specific code
glantau
parents:
diff changeset
254 ldr r10, [r11, #FIX_M_0_899976223_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
255 mul r8, r9, r8 @ r8 = 'z5'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
256 ldr r9, [r11, #FIX_M_2_562915447_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
257 mul r2, r10, r2 @ r2 = 'z1'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
258 ldr r10, [r11, #FIX_M_1_961570560_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
259 mul r0, r9, r0 @ r0 = 'z2'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
260 ldr r9, [r11, #FIX_M_0_390180644_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
261 mla r4, r10, r4, r8 @ r4 = 'z3'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
262 ldr r10, [r11, #FIX_0_298631336_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
263 mla r6, r9, r6, r8 @ r6 = 'z4'
fefaa96def6e arm specific code
glantau
parents:
diff changeset
264 ldr r9, [r11, #FIX_2_053119869_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
265 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
266 ldr r10, [r11, #FIX_3_072711026_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
267 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
268 ldr r9, [r11, #FIX_1_501321110_ID]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
270 add r7, r7, r4 @ r7 = tmp0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
272 add r5, r5, r6 @ r5 = tmp1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
273 add r3, r3, r4 @ r3 = tmp2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
274 add r1, r1, r6 @ r1 = tmp3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
275
fefaa96def6e arm specific code
glantau
parents:
diff changeset
276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
fefaa96def6e arm specific code
glantau
parents:
diff changeset
277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
fefaa96def6e arm specific code
glantau
parents:
diff changeset
278
fefaa96def6e arm specific code
glantau
parents:
diff changeset
279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
280 add r8, r0, r1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
281 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
282 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
283 strh r8, [lr, #( 0*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
284
fefaa96def6e arm specific code
glantau
parents:
diff changeset
285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
286 sub r8, r0, r1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
287 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
288 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
289 strh r8, [lr, #(14*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
290
fefaa96def6e arm specific code
glantau
parents:
diff changeset
291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
292 add r8, r4, r3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
293 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
294 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
295 strh r8, [lr, #( 2*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
296
fefaa96def6e arm specific code
glantau
parents:
diff changeset
297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
298 sub r8, r4, r3
fefaa96def6e arm specific code
glantau
parents:
diff changeset
299 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
300 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
301 strh r8, [lr, #(12*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
302
fefaa96def6e arm specific code
glantau
parents:
diff changeset
303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
304 add r8, r6, r5
fefaa96def6e arm specific code
glantau
parents:
diff changeset
305 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
306 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
307 strh r8, [lr, #( 4*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
308
fefaa96def6e arm specific code
glantau
parents:
diff changeset
309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
310 sub r8, r6, r5
fefaa96def6e arm specific code
glantau
parents:
diff changeset
311 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
312 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
313 strh r8, [lr, #(10*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
314
fefaa96def6e arm specific code
glantau
parents:
diff changeset
315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
316 add r8, r2, r7
fefaa96def6e arm specific code
glantau
parents:
diff changeset
317 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
318 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
319 strh r8, [lr, #( 6*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
320
fefaa96def6e arm specific code
glantau
parents:
diff changeset
321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
322 sub r8, r2, r7
fefaa96def6e arm specific code
glantau
parents:
diff changeset
323 add r8, r8, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
324 mov r8, r8, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
325 strh r8, [lr, #( 8*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
326
fefaa96def6e arm specific code
glantau
parents:
diff changeset
327 @ End of row loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
328 add lr, lr, #2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
329 subs r12, r12, #1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
330 bne column_loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
331 beq the_end
fefaa96def6e arm specific code
glantau
parents:
diff changeset
332
fefaa96def6e arm specific code
glantau
parents:
diff changeset
333 empty_odd_column:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
336 add r0, r0, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
337 mov r0, r0, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
338 strh r0, [lr, #( 0*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
339 strh r0, [lr, #(14*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
340
fefaa96def6e arm specific code
glantau
parents:
diff changeset
341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
343 add r4, r4, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
344 mov r4, r4, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
345 strh r4, [lr, #( 2*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
346 strh r4, [lr, #(12*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
347
fefaa96def6e arm specific code
glantau
parents:
diff changeset
348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
350 add r6, r6, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
351 mov r6, r6, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
352 strh r6, [lr, #( 4*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
353 strh r6, [lr, #(10*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
354
fefaa96def6e arm specific code
glantau
parents:
diff changeset
355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
357 add r2, r2, #(1<<17)
fefaa96def6e arm specific code
glantau
parents:
diff changeset
358 mov r2, r2, asr #18
fefaa96def6e arm specific code
glantau
parents:
diff changeset
359 strh r2, [lr, #( 6*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
360 strh r2, [lr, #( 8*8)]
fefaa96def6e arm specific code
glantau
parents:
diff changeset
361
fefaa96def6e arm specific code
glantau
parents:
diff changeset
362 @ End of row loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
363 add lr, lr, #2
fefaa96def6e arm specific code
glantau
parents:
diff changeset
364 subs r12, r12, #1
fefaa96def6e arm specific code
glantau
parents:
diff changeset
365 bne column_loop
fefaa96def6e arm specific code
glantau
parents:
diff changeset
366
fefaa96def6e arm specific code
glantau
parents:
diff changeset
367 the_end:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
368 @ The end....
fefaa96def6e arm specific code
glantau
parents:
diff changeset
369 add sp, sp, #4
fefaa96def6e arm specific code
glantau
parents:
diff changeset
370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
fefaa96def6e arm specific code
glantau
parents:
diff changeset
371
fefaa96def6e arm specific code
glantau
parents:
diff changeset
372 const_array:
fefaa96def6e arm specific code
glantau
parents:
diff changeset
373 .align
fefaa96def6e arm specific code
glantau
parents:
diff changeset
374 .word FIX_0_298631336
fefaa96def6e arm specific code
glantau
parents:
diff changeset
375 .word FIX_0_541196100
fefaa96def6e arm specific code
glantau
parents:
diff changeset
376 .word FIX_0_765366865
fefaa96def6e arm specific code
glantau
parents:
diff changeset
377 .word FIX_1_175875602
fefaa96def6e arm specific code
glantau
parents:
diff changeset
378 .word FIX_1_501321110
fefaa96def6e arm specific code
glantau
parents:
diff changeset
379 .word FIX_2_053119869
fefaa96def6e arm specific code
glantau
parents:
diff changeset
380 .word FIX_3_072711026
fefaa96def6e arm specific code
glantau
parents:
diff changeset
381 .word FIX_M_0_390180644
fefaa96def6e arm specific code
glantau
parents:
diff changeset
382 .word FIX_M_0_899976223
fefaa96def6e arm specific code
glantau
parents:
diff changeset
383 .word FIX_M_1_847759065
fefaa96def6e arm specific code
glantau
parents:
diff changeset
384 .word FIX_M_1_961570560
fefaa96def6e arm specific code
glantau
parents:
diff changeset
385 .word FIX_M_2_562915447
fefaa96def6e arm specific code
glantau
parents:
diff changeset
386 .word FIX_0xFFFF