comparison armv4l/jrevdct_arm.S @ 2967:ef2149182f1c libavcodec

COSMETICS: Remove all trailing whitespace.
author diego
date Sat, 17 Dec 2005 18:14:38 +0000
parents fefaa96def6e
children bfabfdf9ce55
comparison
equal deleted inserted replaced
2966:564788471dd4 2967:ef2149182f1c
1 /* 1 /*
2 C-like prototype : 2 C-like prototype :
3 void j_rev_dct_ARM(DCTBLOCK data) 3 void j_rev_dct_ARM(DCTBLOCK data)
4 4
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' 5 With DCTBLOCK being a pointer to an array of 64 'signed shorts'
6 6
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 25
26 */ 26 */
27 #define FIX_0_298631336 2446 27 #define FIX_0_298631336 2446
28 #define FIX_0_541196100 4433 28 #define FIX_0_541196100 4433
29 #define FIX_0_765366865 6270 29 #define FIX_0_765366865 6270
30 #define FIX_1_175875602 9633 30 #define FIX_1_175875602 9633
34 #define FIX_M_0_390180644 -3196 34 #define FIX_M_0_390180644 -3196
35 #define FIX_M_0_899976223 -7373 35 #define FIX_M_0_899976223 -7373
36 #define FIX_M_1_847759065 -15137 36 #define FIX_M_1_847759065 -15137
37 #define FIX_M_1_961570560 -16069 37 #define FIX_M_1_961570560 -16069
38 #define FIX_M_2_562915447 -20995 38 #define FIX_M_2_562915447 -20995
39 #define FIX_0xFFFF 0xFFFF 39 #define FIX_0xFFFF 0xFFFF
40 40
41 #define FIX_0_298631336_ID 0 41 #define FIX_0_298631336_ID 0
42 #define FIX_0_541196100_ID 4 42 #define FIX_0_541196100_ID 4
43 #define FIX_0_765366865_ID 8 43 #define FIX_0_765366865_ID 8
44 #define FIX_1_175875602_ID 12 44 #define FIX_1_175875602_ID 12
45 #define FIX_1_501321110_ID 16 45 #define FIX_1_501321110_ID 16
51 #define FIX_M_1_961570560_ID 40 51 #define FIX_M_1_961570560_ID 40
52 #define FIX_M_2_562915447_ID 44 52 #define FIX_M_2_562915447_ID 44
53 #define FIX_0xFFFF_ID 48 53 #define FIX_0xFFFF_ID 48
54 .text 54 .text
55 .align 55 .align
56 56
57 .global j_rev_dct_ARM 57 .global j_rev_dct_ARM
58 j_rev_dct_ARM: 58 j_rev_dct_ARM:
59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs 59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs
60 60
61 sub sp, sp, #4 @ reserve some space on the stack 61 sub sp, sp, #4 @ reserve some space on the stack
62 str r0, [ sp ] @ save the DCT pointer to the stack 62 str r0, [ sp ] @ save the DCT pointer to the stack
63 63
64 mov lr, r0 @ lr = pointer to the current row 64 mov lr, r0 @ lr = pointer to the current row
65 mov r12, #8 @ r12 = row-counter 65 mov r12, #8 @ r12 = row-counter
66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array 66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
67 row_loop: 67 row_loop:
68 ldrsh r0, [lr, # 0] @ r0 = 'd0' 68 ldrsh r0, [lr, # 0] @ r0 = 'd0'
69 ldrsh r1, [lr, # 8] @ r1 = 'd1' 69 ldrsh r1, [lr, # 8] @ r1 = 'd1'
70 70
71 @ Optimization for row that have all items except the first set to 0 71 @ Optimization for row that have all items except the first set to 0
78 orr r3, r3, r2 78 orr r3, r3, r2
79 orrs r5, r3, r5 79 orrs r5, r3, r5
80 beq end_of_row_loop @ nothing to be done as ALL of them are '0' 80 beq end_of_row_loop @ nothing to be done as ALL of them are '0'
81 orrs r2, r3, r1 81 orrs r2, r3, r1
82 beq empty_row 82 beq empty_row
83 83
84 ldrsh r2, [lr, # 2] @ r2 = 'd2' 84 ldrsh r2, [lr, # 2] @ r2 = 'd2'
85 ldrsh r4, [lr, # 4] @ r4 = 'd4' 85 ldrsh r4, [lr, # 4] @ r4 = 'd4'
86 ldrsh r6, [lr, # 6] @ r6 = 'd6' 86 ldrsh r6, [lr, # 6] @ r6 = 'd6'
87 87
88 ldr r3, [r11, #FIX_0_541196100_ID] 88 ldr r3, [r11, #FIX_0_541196100_ID]
89 add r7, r2, r6 89 add r7, r2, r6
90 ldr r5, [r11, #FIX_M_1_847759065_ID] 90 ldr r5, [r11, #FIX_M_1_847759065_ID]
91 mul r7, r3, r7 @ r7 = z1 91 mul r7, r3, r7 @ r7 = z1
92 ldr r3, [r11, #FIX_0_765366865_ID] 92 ldr r3, [r11, #FIX_0_765366865_ID]
99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
100 add r4, r6, r3, lsl #13 @ r4 = tmp11 100 add r4, r6, r3, lsl #13 @ r4 = tmp11
101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12
102 102
103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
104 104
105 ldrsh r3, [lr, #10] @ r3 = 'd3' 105 ldrsh r3, [lr, #10] @ r3 = 'd3'
106 ldrsh r5, [lr, #12] @ r5 = 'd5' 106 ldrsh r5, [lr, #12] @ r5 = 'd5'
107 ldrsh r7, [lr, #14] @ r7 = 'd7' 107 ldrsh r7, [lr, #14] @ r7 = 'd7'
108 108
109 add r0, r3, r5 @ r0 = 'z2' 109 add r0, r3, r5 @ r0 = 'z2'
134 add r3, r3, r4 @ r3 = tmp2 134 add r3, r3, r4 @ r3 = tmp2
135 add r1, r1, r6 @ r1 = tmp3 135 add r1, r1, r6 @ r1 = tmp3
136 136
137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
139 139
140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) 140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
141 add r8, r0, r1 141 add r8, r0, r1
142 add r8, r8, #(1<<10) 142 add r8, r8, #(1<<10)
143 mov r8, r8, asr #11 143 mov r8, r8, asr #11
144 strh r8, [lr, # 0] 144 strh r8, [lr, # 0]
145 145
146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) 146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
147 sub r8, r0, r1 147 sub r8, r0, r1
148 add r8, r8, #(1<<10) 148 add r8, r8, #(1<<10)
149 mov r8, r8, asr #11 149 mov r8, r8, asr #11
150 strh r8, [lr, #14] 150 strh r8, [lr, #14]
151 151
152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) 152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
153 add r8, r6, r3 153 add r8, r6, r3
154 add r8, r8, #(1<<10) 154 add r8, r8, #(1<<10)
155 mov r8, r8, asr #11 155 mov r8, r8, asr #11
156 strh r8, [lr, # 2] 156 strh r8, [lr, # 2]
157 157
158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) 158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
159 sub r8, r6, r3 159 sub r8, r6, r3
160 add r8, r8, #(1<<10) 160 add r8, r8, #(1<<10)
161 mov r8, r8, asr #11 161 mov r8, r8, asr #11
162 strh r8, [lr, #12] 162 strh r8, [lr, #12]
163 163
164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) 164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
165 add r8, r4, r5 165 add r8, r4, r5
166 add r8, r8, #(1<<10) 166 add r8, r8, #(1<<10)
167 mov r8, r8, asr #11 167 mov r8, r8, asr #11
168 strh r8, [lr, # 4] 168 strh r8, [lr, # 4]
169 169
170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) 170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
171 sub r8, r4, r5 171 sub r8, r4, r5
172 add r8, r8, #(1<<10) 172 add r8, r8, #(1<<10)
173 mov r8, r8, asr #11 173 mov r8, r8, asr #11
174 strh r8, [lr, #10] 174 strh r8, [lr, #10]
175 175
176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) 176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
177 add r8, r2, r7 177 add r8, r2, r7
178 add r8, r8, #(1<<10) 178 add r8, r8, #(1<<10)
179 mov r8, r8, asr #11 179 mov r8, r8, asr #11
180 strh r8, [lr, # 6] 180 strh r8, [lr, # 6]
181 181
182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) 182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
183 sub r8, r2, r7 183 sub r8, r2, r7
184 add r8, r8, #(1<<10) 184 add r8, r8, #(1<<10)
185 mov r8, r8, asr #11 185 mov r8, r8, asr #11
186 strh r8, [lr, # 8] 186 strh r8, [lr, # 8]
188 @ End of row loop 188 @ End of row loop
189 add lr, lr, #16 189 add lr, lr, #16
190 subs r12, r12, #1 190 subs r12, r12, #1
191 bne row_loop 191 bne row_loop
192 beq start_column_loop 192 beq start_column_loop
193 193
194 empty_row: 194 empty_row:
195 ldr r1, [r11, #FIX_0xFFFF_ID] 195 ldr r1, [r11, #FIX_0xFFFF_ID]
196 mov r0, r0, lsl #2 196 mov r0, r0, lsl #2
197 and r0, r0, r1 197 and r0, r0, r1
198 add r0, r0, r0, lsl #16 198 add r0, r0, r0, lsl #16
242 orr r10, r5, r7 242 orr r10, r5, r7
243 orrs r10, r9, r10 243 orrs r10, r9, r10
244 beq empty_odd_column 244 beq empty_odd_column
245 245
246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
247 247
248 add r0, r3, r5 @ r0 = 'z2' 248 add r0, r3, r5 @ r0 = 'z2'
249 add r2, r1, r7 @ r2 = 'z1' 249 add r2, r1, r7 @ r2 = 'z1'
250 add r4, r3, r7 @ r4 = 'z3' 250 add r4, r3, r7 @ r4 = 'z3'
251 add r6, r1, r5 @ r6 = 'z4' 251 add r6, r1, r5 @ r6 = 'z4'
252 ldr r9, [r11, #FIX_1_175875602_ID] 252 ldr r9, [r11, #FIX_1_175875602_ID]
269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
270 add r7, r7, r4 @ r7 = tmp0 270 add r7, r7, r4 @ r7 = tmp0
271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
272 add r5, r5, r6 @ r5 = tmp1 272 add r5, r5, r6 @ r5 = tmp1
273 add r3, r3, r4 @ r3 = tmp2 273 add r3, r3, r4 @ r3 = tmp2
274 add r1, r1, r6 @ r1 = tmp3 274 add r1, r1, r6 @ r1 = tmp3
275 275
276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
278 278
279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
280 add r8, r0, r1 280 add r8, r0, r1
281 add r8, r8, #(1<<17) 281 add r8, r8, #(1<<17)
282 mov r8, r8, asr #18 282 mov r8, r8, asr #18
283 strh r8, [lr, #( 0*8)] 283 strh r8, [lr, #( 0*8)]
284 284
285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
286 sub r8, r0, r1 286 sub r8, r0, r1
287 add r8, r8, #(1<<17) 287 add r8, r8, #(1<<17)
288 mov r8, r8, asr #18 288 mov r8, r8, asr #18
289 strh r8, [lr, #(14*8)] 289 strh r8, [lr, #(14*8)]
290 290
291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
292 add r8, r4, r3 292 add r8, r4, r3
293 add r8, r8, #(1<<17) 293 add r8, r8, #(1<<17)
294 mov r8, r8, asr #18 294 mov r8, r8, asr #18
295 strh r8, [lr, #( 2*8)] 295 strh r8, [lr, #( 2*8)]
296 296
297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
298 sub r8, r4, r3 298 sub r8, r4, r3
299 add r8, r8, #(1<<17) 299 add r8, r8, #(1<<17)
300 mov r8, r8, asr #18 300 mov r8, r8, asr #18
301 strh r8, [lr, #(12*8)] 301 strh r8, [lr, #(12*8)]
302 302
303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
304 add r8, r6, r5 304 add r8, r6, r5
305 add r8, r8, #(1<<17) 305 add r8, r8, #(1<<17)
306 mov r8, r8, asr #18 306 mov r8, r8, asr #18
307 strh r8, [lr, #( 4*8)] 307 strh r8, [lr, #( 4*8)]
308 308
309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
310 sub r8, r6, r5 310 sub r8, r6, r5
311 add r8, r8, #(1<<17) 311 add r8, r8, #(1<<17)
312 mov r8, r8, asr #18 312 mov r8, r8, asr #18
313 strh r8, [lr, #(10*8)] 313 strh r8, [lr, #(10*8)]
314 314
315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
316 add r8, r2, r7 316 add r8, r2, r7
317 add r8, r8, #(1<<17) 317 add r8, r8, #(1<<17)
318 mov r8, r8, asr #18 318 mov r8, r8, asr #18
319 strh r8, [lr, #( 6*8)] 319 strh r8, [lr, #( 6*8)]
320 320
321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
322 sub r8, r2, r7 322 sub r8, r2, r7
323 add r8, r8, #(1<<17) 323 add r8, r8, #(1<<17)
324 mov r8, r8, asr #18 324 mov r8, r8, asr #18
325 strh r8, [lr, #( 8*8)] 325 strh r8, [lr, #( 8*8)]
327 @ End of row loop 327 @ End of row loop
328 add lr, lr, #2 328 add lr, lr, #2
329 subs r12, r12, #1 329 subs r12, r12, #1
330 bne column_loop 330 bne column_loop
331 beq the_end 331 beq the_end
332 332
333 empty_odd_column: 333 empty_odd_column:
334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
336 add r0, r0, #(1<<17) 336 add r0, r0, #(1<<17)
337 mov r0, r0, asr #18 337 mov r0, r0, asr #18
338 strh r0, [lr, #( 0*8)] 338 strh r0, [lr, #( 0*8)]
339 strh r0, [lr, #(14*8)] 339 strh r0, [lr, #(14*8)]
340 340
341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
343 add r4, r4, #(1<<17) 343 add r4, r4, #(1<<17)
344 mov r4, r4, asr #18 344 mov r4, r4, asr #18
345 strh r4, [lr, #( 2*8)] 345 strh r4, [lr, #( 2*8)]
346 strh r4, [lr, #(12*8)] 346 strh r4, [lr, #(12*8)]
347 347
348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
350 add r6, r6, #(1<<17) 350 add r6, r6, #(1<<17)
351 mov r6, r6, asr #18 351 mov r6, r6, asr #18
352 strh r6, [lr, #( 4*8)] 352 strh r6, [lr, #( 4*8)]
353 strh r6, [lr, #(10*8)] 353 strh r6, [lr, #(10*8)]
354 354
355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
357 add r2, r2, #(1<<17) 357 add r2, r2, #(1<<17)
358 mov r2, r2, asr #18 358 mov r2, r2, asr #18
359 strh r2, [lr, #( 6*8)] 359 strh r2, [lr, #( 6*8)]
361 361
362 @ End of row loop 362 @ End of row loop
363 add lr, lr, #2 363 add lr, lr, #2
364 subs r12, r12, #1 364 subs r12, r12, #1
365 bne column_loop 365 bne column_loop
366 366
367 the_end: 367 the_end:
368 @ The end.... 368 @ The end....
369 add sp, sp, #4 369 add sp, sp, #4
370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return 370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
371 371
372 const_array: 372 const_array: