comparison armv4l/jrevdct_arm.S @ 2979:bfabfdf9ce55 libavcodec

COSMETICS: tabs --> spaces, some prettyprinting
author diego
date Thu, 22 Dec 2005 01:10:11 +0000
parents ef2149182f1c
children 316762ae96a7
comparison
equal deleted inserted replaced
2978:403183bbb505 2979:bfabfdf9ce55
1 /* 1 /*
2 C-like prototype : 2 C-like prototype :
3 void j_rev_dct_ARM(DCTBLOCK data) 3 void j_rev_dct_ARM(DCTBLOCK data)
4 4
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' 5 With DCTBLOCK being a pointer to an array of 64 'signed shorts'
6 6
7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) 7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
8 8
49 #define FIX_M_0_899976223_ID 32 49 #define FIX_M_0_899976223_ID 32
50 #define FIX_M_1_847759065_ID 36 50 #define FIX_M_1_847759065_ID 36
51 #define FIX_M_1_961570560_ID 40 51 #define FIX_M_1_961570560_ID 40
52 #define FIX_M_2_562915447_ID 44 52 #define FIX_M_2_562915447_ID 44
53 #define FIX_0xFFFF_ID 48 53 #define FIX_0xFFFF_ID 48
54 .text 54 .text
55 .align 55 .align
56 56
57 .global j_rev_dct_ARM 57 .global j_rev_dct_ARM
58 j_rev_dct_ARM: 58 j_rev_dct_ARM:
59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs 59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs
60 60
61 sub sp, sp, #4 @ reserve some space on the stack 61 sub sp, sp, #4 @ reserve some space on the stack
62 str r0, [ sp ] @ save the DCT pointer to the stack 62 str r0, [ sp ] @ save the DCT pointer to the stack
63 63
64 mov lr, r0 @ lr = pointer to the current row 64 mov lr, r0 @ lr = pointer to the current row
65 mov r12, #8 @ r12 = row-counter 65 mov r12, #8 @ r12 = row-counter
66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array 66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
67 row_loop: 67 row_loop:
68 ldrsh r0, [lr, # 0] @ r0 = 'd0' 68 ldrsh r0, [lr, # 0] @ r0 = 'd0'
69 ldrsh r1, [lr, # 8] @ r1 = 'd1' 69 ldrsh r1, [lr, # 8] @ r1 = 'd1'
70 70
71 @ Optimization for row that have all items except the first set to 0 71 @ Optimization for row that have all items except the first set to 0
72 @ (this works as the DCTELEMS are always 4-byte aligned) 72 @ (this works as the DCTELEMS are always 4-byte aligned)
73 ldr r5, [lr, # 0] 73 ldr r5, [lr, # 0]
74 ldr r2, [lr, # 4] 74 ldr r2, [lr, # 4]
75 ldr r3, [lr, # 8] 75 ldr r3, [lr, # 8]
76 ldr r4, [lr, #12] 76 ldr r4, [lr, #12]
77 orr r3, r3, r4 77 orr r3, r3, r4
78 orr r3, r3, r2 78 orr r3, r3, r2
79 orrs r5, r3, r5 79 orrs r5, r3, r5
80 beq end_of_row_loop @ nothing to be done as ALL of them are '0' 80 beq end_of_row_loop @ nothing to be done as ALL of them are '0'
81 orrs r2, r3, r1 81 orrs r2, r3, r1
82 beq empty_row 82 beq empty_row
83 83
84 ldrsh r2, [lr, # 2] @ r2 = 'd2' 84 ldrsh r2, [lr, # 2] @ r2 = 'd2'
85 ldrsh r4, [lr, # 4] @ r4 = 'd4' 85 ldrsh r4, [lr, # 4] @ r4 = 'd4'
86 ldrsh r6, [lr, # 6] @ r6 = 'd6' 86 ldrsh r6, [lr, # 6] @ r6 = 'd6'
87 87
88 ldr r3, [r11, #FIX_0_541196100_ID] 88 ldr r3, [r11, #FIX_0_541196100_ID]
89 add r7, r2, r6 89 add r7, r2, r6
90 ldr r5, [r11, #FIX_M_1_847759065_ID] 90 ldr r5, [r11, #FIX_M_1_847759065_ID]
91 mul r7, r3, r7 @ r7 = z1 91 mul r7, r3, r7 @ r7 = z1
92 ldr r3, [r11, #FIX_0_765366865_ID] 92 ldr r3, [r11, #FIX_0_765366865_ID]
93 mla r6, r5, r6, r7 @ r6 = tmp2 93 mla r6, r5, r6, r7 @ r6 = tmp2
94 add r5, r0, r4 @ r5 = tmp0 94 add r5, r0, r4 @ r5 = tmp0
95 mla r2, r3, r2, r7 @ r2 = tmp3 95 mla r2, r3, r2, r7 @ r2 = tmp3
96 sub r3, r0, r4 @ r3 = tmp1 96 sub r3, r0, r4 @ r3 = tmp1
97 97
98 add r0, r2, r5, lsl #13 @ r0 = tmp10 98 add r0, r2, r5, lsl #13 @ r0 = tmp10
99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
100 add r4, r6, r3, lsl #13 @ r4 = tmp11 100 add r4, r6, r3, lsl #13 @ r4 = tmp11
101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12
102 102
103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
104 104
105 ldrsh r3, [lr, #10] @ r3 = 'd3' 105 ldrsh r3, [lr, #10] @ r3 = 'd3'
106 ldrsh r5, [lr, #12] @ r5 = 'd5' 106 ldrsh r5, [lr, #12] @ r5 = 'd5'
107 ldrsh r7, [lr, #14] @ r7 = 'd7' 107 ldrsh r7, [lr, #14] @ r7 = 'd7'
108 108
109 add r0, r3, r5 @ r0 = 'z2' 109 add r0, r3, r5 @ r0 = 'z2'
110 add r2, r1, r7 @ r2 = 'z1' 110 add r2, r1, r7 @ r2 = 'z1'
111 add r4, r3, r7 @ r4 = 'z3' 111 add r4, r3, r7 @ r4 = 'z3'
112 add r6, r1, r5 @ r6 = 'z4' 112 add r6, r1, r5 @ r6 = 'z4'
113 ldr r9, [r11, #FIX_1_175875602_ID] 113 ldr r9, [r11, #FIX_1_175875602_ID]
114 add r8, r4, r6 @ r8 = z3 + z4 114 add r8, r4, r6 @ r8 = z3 + z4
115 ldr r10, [r11, #FIX_M_0_899976223_ID] 115 ldr r10, [r11, #FIX_M_0_899976223_ID]
116 mul r8, r9, r8 @ r8 = 'z5' 116 mul r8, r9, r8 @ r8 = 'z5'
117 ldr r9, [r11, #FIX_M_2_562915447_ID] 117 ldr r9, [r11, #FIX_M_2_562915447_ID]
118 mul r2, r10, r2 @ r2 = 'z1' 118 mul r2, r10, r2 @ r2 = 'z1'
119 ldr r10, [r11, #FIX_M_1_961570560_ID] 119 ldr r10, [r11, #FIX_M_1_961570560_ID]
120 mul r0, r9, r0 @ r0 = 'z2' 120 mul r0, r9, r0 @ r0 = 'z2'
121 ldr r9, [r11, #FIX_M_0_390180644_ID] 121 ldr r9, [r11, #FIX_M_0_390180644_ID]
122 mla r4, r10, r4, r8 @ r4 = 'z3' 122 mla r4, r10, r4, r8 @ r4 = 'z3'
123 ldr r10, [r11, #FIX_0_298631336_ID] 123 ldr r10, [r11, #FIX_0_298631336_ID]
124 mla r6, r9, r6, r8 @ r6 = 'z4' 124 mla r6, r9, r6, r8 @ r6 = 'z4'
125 ldr r9, [r11, #FIX_2_053119869_ID] 125 ldr r9, [r11, #FIX_2_053119869_ID]
126 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 126 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
127 ldr r10, [r11, #FIX_3_072711026_ID] 127 ldr r10, [r11, #FIX_3_072711026_ID]
128 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 128 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
129 ldr r9, [r11, #FIX_1_501321110_ID] 129 ldr r9, [r11, #FIX_1_501321110_ID]
130 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 130 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
131 add r7, r7, r4 @ r7 = tmp0 131 add r7, r7, r4 @ r7 = tmp0
132 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 132 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
133 add r5, r5, r6 @ r5 = tmp1 133 add r5, r5, r6 @ r5 = tmp1
134 add r3, r3, r4 @ r3 = tmp2 134 add r3, r3, r4 @ r3 = tmp2
135 add r1, r1, r6 @ r1 = tmp3 135 add r1, r1, r6 @ r1 = tmp3
136 136
137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
139 139
140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) 140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
141 add r8, r0, r1 141 add r8, r0, r1
142 add r8, r8, #(1<<10) 142 add r8, r8, #(1<<10)
143 mov r8, r8, asr #11 143 mov r8, r8, asr #11
144 strh r8, [lr, # 0] 144 strh r8, [lr, # 0]
145 145
146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) 146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
147 sub r8, r0, r1 147 sub r8, r0, r1
148 add r8, r8, #(1<<10) 148 add r8, r8, #(1<<10)
149 mov r8, r8, asr #11 149 mov r8, r8, asr #11
150 strh r8, [lr, #14] 150 strh r8, [lr, #14]
151 151
152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) 152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
153 add r8, r6, r3 153 add r8, r6, r3
154 add r8, r8, #(1<<10) 154 add r8, r8, #(1<<10)
155 mov r8, r8, asr #11 155 mov r8, r8, asr #11
156 strh r8, [lr, # 2] 156 strh r8, [lr, # 2]
157 157
158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) 158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
159 sub r8, r6, r3 159 sub r8, r6, r3
160 add r8, r8, #(1<<10) 160 add r8, r8, #(1<<10)
161 mov r8, r8, asr #11 161 mov r8, r8, asr #11
162 strh r8, [lr, #12] 162 strh r8, [lr, #12]
163 163
164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) 164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
165 add r8, r4, r5 165 add r8, r4, r5
166 add r8, r8, #(1<<10) 166 add r8, r8, #(1<<10)
167 mov r8, r8, asr #11 167 mov r8, r8, asr #11
168 strh r8, [lr, # 4] 168 strh r8, [lr, # 4]
169 169
170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) 170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
171 sub r8, r4, r5 171 sub r8, r4, r5
172 add r8, r8, #(1<<10) 172 add r8, r8, #(1<<10)
173 mov r8, r8, asr #11 173 mov r8, r8, asr #11
174 strh r8, [lr, #10] 174 strh r8, [lr, #10]
175 175
176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) 176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
177 add r8, r2, r7 177 add r8, r2, r7
178 add r8, r8, #(1<<10) 178 add r8, r8, #(1<<10)
179 mov r8, r8, asr #11 179 mov r8, r8, asr #11
180 strh r8, [lr, # 6] 180 strh r8, [lr, # 6]
181 181
182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) 182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
183 sub r8, r2, r7 183 sub r8, r2, r7
184 add r8, r8, #(1<<10) 184 add r8, r8, #(1<<10)
185 mov r8, r8, asr #11 185 mov r8, r8, asr #11
186 strh r8, [lr, # 8] 186 strh r8, [lr, # 8]
187 187
188 @ End of row loop 188 @ End of row loop
189 add lr, lr, #16 189 add lr, lr, #16
190 subs r12, r12, #1 190 subs r12, r12, #1
191 bne row_loop 191 bne row_loop
192 beq start_column_loop 192 beq start_column_loop
193 193
194 empty_row: 194 empty_row:
195 ldr r1, [r11, #FIX_0xFFFF_ID] 195 ldr r1, [r11, #FIX_0xFFFF_ID]
196 mov r0, r0, lsl #2 196 mov r0, r0, lsl #2
197 and r0, r0, r1 197 and r0, r0, r1
198 add r0, r0, r0, lsl #16 198 add r0, r0, r0, lsl #16
199 str r0, [lr, # 0] 199 str r0, [lr, # 0]
200 str r0, [lr, # 4] 200 str r0, [lr, # 4]
201 str r0, [lr, # 8] 201 str r0, [lr, # 8]
202 str r0, [lr, #12] 202 str r0, [lr, #12]
203 203
204 end_of_row_loop: 204 end_of_row_loop:
205 @ End of loop 205 @ End of loop
206 add lr, lr, #16 206 add lr, lr, #16
207 subs r12, r12, #1 207 subs r12, r12, #1
208 bne row_loop 208 bne row_loop
209 209
210 start_column_loop: 210 start_column_loop:
211 @ Start of column loop 211 @ Start of column loop
212 ldr lr, [ sp ] 212 ldr lr, [ sp ]
213 mov r12, #8 213 mov r12, #8
214 column_loop: 214 column_loop:
215 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' 215 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
216 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' 216 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
217 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' 217 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
218 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' 218 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
219 219
220 ldr r3, [r11, #FIX_0_541196100_ID] 220 ldr r3, [r11, #FIX_0_541196100_ID]
221 add r1, r2, r6 221 add r1, r2, r6
222 ldr r5, [r11, #FIX_M_1_847759065_ID] 222 ldr r5, [r11, #FIX_M_1_847759065_ID]
223 mul r1, r3, r1 @ r1 = z1 223 mul r1, r3, r1 @ r1 = z1
224 ldr r3, [r11, #FIX_0_765366865_ID] 224 ldr r3, [r11, #FIX_0_765366865_ID]
225 mla r6, r5, r6, r1 @ r6 = tmp2 225 mla r6, r5, r6, r1 @ r6 = tmp2
226 add r5, r0, r4 @ r5 = tmp0 226 add r5, r0, r4 @ r5 = tmp0
227 mla r2, r3, r2, r1 @ r2 = tmp3 227 mla r2, r3, r2, r1 @ r2 = tmp3
228 sub r3, r0, r4 @ r3 = tmp1 228 sub r3, r0, r4 @ r3 = tmp1
229 229
230 add r0, r2, r5, lsl #13 @ r0 = tmp10 230 add r0, r2, r5, lsl #13 @ r0 = tmp10
231 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 231 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
232 add r4, r6, r3, lsl #13 @ r4 = tmp11 232 add r4, r6, r3, lsl #13 @ r4 = tmp11
233 rsb r6, r6, r3, lsl #13 @ r6 = tmp12 233 rsb r6, r6, r3, lsl #13 @ r6 = tmp12
234 234
235 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' 235 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
236 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' 236 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
237 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' 237 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
238 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' 238 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
239 239
240 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) 240 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
241 orr r9, r1, r3 241 orr r9, r1, r3
242 orr r10, r5, r7 242 orr r10, r5, r7
243 orrs r10, r9, r10 243 orrs r10, r9, r10
244 beq empty_odd_column 244 beq empty_odd_column
245 245
246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
247 247
248 add r0, r3, r5 @ r0 = 'z2' 248 add r0, r3, r5 @ r0 = 'z2'
249 add r2, r1, r7 @ r2 = 'z1' 249 add r2, r1, r7 @ r2 = 'z1'
250 add r4, r3, r7 @ r4 = 'z3' 250 add r4, r3, r7 @ r4 = 'z3'
251 add r6, r1, r5 @ r6 = 'z4' 251 add r6, r1, r5 @ r6 = 'z4'
252 ldr r9, [r11, #FIX_1_175875602_ID] 252 ldr r9, [r11, #FIX_1_175875602_ID]
253 add r8, r4, r6 253 add r8, r4, r6
254 ldr r10, [r11, #FIX_M_0_899976223_ID] 254 ldr r10, [r11, #FIX_M_0_899976223_ID]
255 mul r8, r9, r8 @ r8 = 'z5' 255 mul r8, r9, r8 @ r8 = 'z5'
256 ldr r9, [r11, #FIX_M_2_562915447_ID] 256 ldr r9, [r11, #FIX_M_2_562915447_ID]
257 mul r2, r10, r2 @ r2 = 'z1' 257 mul r2, r10, r2 @ r2 = 'z1'
258 ldr r10, [r11, #FIX_M_1_961570560_ID] 258 ldr r10, [r11, #FIX_M_1_961570560_ID]
259 mul r0, r9, r0 @ r0 = 'z2' 259 mul r0, r9, r0 @ r0 = 'z2'
260 ldr r9, [r11, #FIX_M_0_390180644_ID] 260 ldr r9, [r11, #FIX_M_0_390180644_ID]
261 mla r4, r10, r4, r8 @ r4 = 'z3' 261 mla r4, r10, r4, r8 @ r4 = 'z3'
262 ldr r10, [r11, #FIX_0_298631336_ID] 262 ldr r10, [r11, #FIX_0_298631336_ID]
263 mla r6, r9, r6, r8 @ r6 = 'z4' 263 mla r6, r9, r6, r8 @ r6 = 'z4'
264 ldr r9, [r11, #FIX_2_053119869_ID] 264 ldr r9, [r11, #FIX_2_053119869_ID]
265 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 265 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
266 ldr r10, [r11, #FIX_3_072711026_ID] 266 ldr r10, [r11, #FIX_3_072711026_ID]
267 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 267 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
268 ldr r9, [r11, #FIX_1_501321110_ID] 268 ldr r9, [r11, #FIX_1_501321110_ID]
269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
270 add r7, r7, r4 @ r7 = tmp0 270 add r7, r7, r4 @ r7 = tmp0
271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
272 add r5, r5, r6 @ r5 = tmp1 272 add r5, r5, r6 @ r5 = tmp1
273 add r3, r3, r4 @ r3 = tmp2 273 add r3, r3, r4 @ r3 = tmp2
274 add r1, r1, r6 @ r1 = tmp3 274 add r1, r1, r6 @ r1 = tmp3
275 275
276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
278 278
279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
280 add r8, r0, r1 280 add r8, r0, r1
281 add r8, r8, #(1<<17) 281 add r8, r8, #(1<<17)
282 mov r8, r8, asr #18 282 mov r8, r8, asr #18
283 strh r8, [lr, #( 0*8)] 283 strh r8, [lr, #( 0*8)]
284 284
285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
286 sub r8, r0, r1 286 sub r8, r0, r1
287 add r8, r8, #(1<<17) 287 add r8, r8, #(1<<17)
288 mov r8, r8, asr #18 288 mov r8, r8, asr #18
289 strh r8, [lr, #(14*8)] 289 strh r8, [lr, #(14*8)]
290 290
291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
292 add r8, r4, r3 292 add r8, r4, r3
293 add r8, r8, #(1<<17) 293 add r8, r8, #(1<<17)
294 mov r8, r8, asr #18 294 mov r8, r8, asr #18
295 strh r8, [lr, #( 2*8)] 295 strh r8, [lr, #( 2*8)]
296 296
297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
298 sub r8, r4, r3 298 sub r8, r4, r3
299 add r8, r8, #(1<<17) 299 add r8, r8, #(1<<17)
300 mov r8, r8, asr #18 300 mov r8, r8, asr #18
301 strh r8, [lr, #(12*8)] 301 strh r8, [lr, #(12*8)]
302 302
303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
304 add r8, r6, r5 304 add r8, r6, r5
305 add r8, r8, #(1<<17) 305 add r8, r8, #(1<<17)
306 mov r8, r8, asr #18 306 mov r8, r8, asr #18
307 strh r8, [lr, #( 4*8)] 307 strh r8, [lr, #( 4*8)]
308 308
309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
310 sub r8, r6, r5 310 sub r8, r6, r5
311 add r8, r8, #(1<<17) 311 add r8, r8, #(1<<17)
312 mov r8, r8, asr #18 312 mov r8, r8, asr #18
313 strh r8, [lr, #(10*8)] 313 strh r8, [lr, #(10*8)]
314 314
315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
316 add r8, r2, r7 316 add r8, r2, r7
317 add r8, r8, #(1<<17) 317 add r8, r8, #(1<<17)
318 mov r8, r8, asr #18 318 mov r8, r8, asr #18
319 strh r8, [lr, #( 6*8)] 319 strh r8, [lr, #( 6*8)]
320 320
321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
322 sub r8, r2, r7 322 sub r8, r2, r7
323 add r8, r8, #(1<<17) 323 add r8, r8, #(1<<17)
324 mov r8, r8, asr #18 324 mov r8, r8, asr #18
325 strh r8, [lr, #( 8*8)] 325 strh r8, [lr, #( 8*8)]
326 326
327 @ End of row loop 327 @ End of row loop
328 add lr, lr, #2 328 add lr, lr, #2
329 subs r12, r12, #1 329 subs r12, r12, #1
330 bne column_loop 330 bne column_loop
331 beq the_end 331 beq the_end
332 332
333 empty_odd_column: 333 empty_odd_column:
334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
336 add r0, r0, #(1<<17) 336 add r0, r0, #(1<<17)
337 mov r0, r0, asr #18 337 mov r0, r0, asr #18
338 strh r0, [lr, #( 0*8)] 338 strh r0, [lr, #( 0*8)]
339 strh r0, [lr, #(14*8)] 339 strh r0, [lr, #(14*8)]
340 340
341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
343 add r4, r4, #(1<<17) 343 add r4, r4, #(1<<17)
344 mov r4, r4, asr #18 344 mov r4, r4, asr #18
345 strh r4, [lr, #( 2*8)] 345 strh r4, [lr, #( 2*8)]
346 strh r4, [lr, #(12*8)] 346 strh r4, [lr, #(12*8)]
347 347
348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
350 add r6, r6, #(1<<17) 350 add r6, r6, #(1<<17)
351 mov r6, r6, asr #18 351 mov r6, r6, asr #18
352 strh r6, [lr, #( 4*8)] 352 strh r6, [lr, #( 4*8)]
353 strh r6, [lr, #(10*8)] 353 strh r6, [lr, #(10*8)]
354 354
355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
357 add r2, r2, #(1<<17) 357 add r2, r2, #(1<<17)
358 mov r2, r2, asr #18 358 mov r2, r2, asr #18
359 strh r2, [lr, #( 6*8)] 359 strh r2, [lr, #( 6*8)]
360 strh r2, [lr, #( 8*8)] 360 strh r2, [lr, #( 8*8)]
361 361
362 @ End of row loop 362 @ End of row loop
363 add lr, lr, #2 363 add lr, lr, #2
364 subs r12, r12, #1 364 subs r12, r12, #1
365 bne column_loop 365 bne column_loop
366 366
367 the_end: 367 the_end:
368 @ The end.... 368 @ The end....
369 add sp, sp, #4 369 add sp, sp, #4
370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return 370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
371 371
372 const_array: 372 const_array:
373 .align 373 .align
374 .word FIX_0_298631336 374 .word FIX_0_298631336
375 .word FIX_0_541196100 375 .word FIX_0_541196100
376 .word FIX_0_765366865 376 .word FIX_0_765366865
377 .word FIX_1_175875602 377 .word FIX_1_175875602
378 .word FIX_1_501321110 378 .word FIX_1_501321110
379 .word FIX_2_053119869 379 .word FIX_2_053119869
380 .word FIX_3_072711026 380 .word FIX_3_072711026
381 .word FIX_M_0_390180644 381 .word FIX_M_0_390180644
382 .word FIX_M_0_899976223 382 .word FIX_M_0_899976223
383 .word FIX_M_1_847759065 383 .word FIX_M_1_847759065
384 .word FIX_M_1_961570560 384 .word FIX_M_1_961570560
385 .word FIX_M_2_562915447 385 .word FIX_M_2_562915447
386 .word FIX_0xFFFF 386 .word FIX_0xFFFF