Mercurial > libavcodec.hg
comparison arm/jrevdct_arm.S @ 8359:9281a8a9387a libavcodec
ARM: replace "armv4l" with "arm"
author | mru |
---|---|
date | Wed, 17 Dec 2008 00:54:54 +0000 |
parents | armv4l/jrevdct_arm.S@c45366b01126 |
children | 989ea69f6a4e |
comparison
equal
deleted
inserted
replaced
8358:c30b92cf446b | 8359:9281a8a9387a |
---|---|
1 /* | |
2 C-like prototype : | |
3 void j_rev_dct_ARM(DCTBLOCK data) | |
4 | |
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' | |
6 | |
7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) | |
8 | |
9 Permission is hereby granted, free of charge, to any person obtaining a copy | |
10 of this software and associated documentation files (the "Software"), to deal | |
11 in the Software without restriction, including without limitation the rights | |
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
13 copies of the Software, and to permit persons to whom the Software is | |
14 furnished to do so, subject to the following conditions: | |
15 | |
16 The above copyright notice and this permission notice shall be included in | |
17 all copies or substantial portions of the Software. | |
18 | |
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |
23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
25 | |
26 */ | |
27 | |
28 #include "asm.S" | |
29 | |
30 #define FIX_0_298631336 2446 | |
31 #define FIX_0_541196100 4433 | |
32 #define FIX_0_765366865 6270 | |
33 #define FIX_1_175875602 9633 | |
34 #define FIX_1_501321110 12299 | |
35 #define FIX_2_053119869 16819 | |
36 #define FIX_3_072711026 25172 | |
37 #define FIX_M_0_390180644 -3196 | |
38 #define FIX_M_0_899976223 -7373 | |
39 #define FIX_M_1_847759065 -15137 | |
40 #define FIX_M_1_961570560 -16069 | |
41 #define FIX_M_2_562915447 -20995 | |
42 #define FIX_0xFFFF 0xFFFF | |
43 | |
44 #define FIX_0_298631336_ID 0 | |
45 #define FIX_0_541196100_ID 4 | |
46 #define FIX_0_765366865_ID 8 | |
47 #define FIX_1_175875602_ID 12 | |
48 #define FIX_1_501321110_ID 16 | |
49 #define FIX_2_053119869_ID 20 | |
50 #define FIX_3_072711026_ID 24 | |
51 #define FIX_M_0_390180644_ID 28 | |
52 #define FIX_M_0_899976223_ID 32 | |
53 #define FIX_M_1_847759065_ID 36 | |
54 #define FIX_M_1_961570560_ID 40 | |
55 #define FIX_M_2_562915447_ID 44 | |
56 #define FIX_0xFFFF_ID 48 | |
57 .text | |
58 .align | |
59 | |
60 function j_rev_dct_ARM, export=1 | |
61 stmdb sp!, { r4 - r12, lr } @ all callee saved regs | |
62 | |
63 sub sp, sp, #4 @ reserve some space on the stack | |
64 str r0, [ sp ] @ save the DCT pointer to the stack | |
65 | |
66 mov lr, r0 @ lr = pointer to the current row | |
67 mov r12, #8 @ r12 = row-counter | |
68 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array | |
69 row_loop: | |
70 ldrsh r0, [lr, # 0] @ r0 = 'd0' | |
71 ldrsh r2, [lr, # 2] @ r2 = 'd2' | |
72 | |
73 @ Optimization for row that have all items except the first set to 0 | |
74 @ (this works as the DCTELEMS are always 4-byte aligned) | |
75 ldr r5, [lr, # 0] | |
76 ldr r6, [lr, # 4] | |
77 ldr r3, [lr, # 8] | |
78 ldr r4, [lr, #12] | |
79 orr r3, r3, r4 | |
80 orr r3, r3, r6 | |
81 orrs r5, r3, r5 | |
82 beq end_of_row_loop @ nothing to be done as ALL of them are '0' | |
83 orrs r3, r3, r2 | |
84 beq empty_row | |
85 | |
86 ldrsh r1, [lr, # 8] @ r1 = 'd1' | |
87 ldrsh r4, [lr, # 4] @ r4 = 'd4' | |
88 ldrsh r6, [lr, # 6] @ r6 = 'd6' | |
89 | |
90 ldr r3, [r11, #FIX_0_541196100_ID] | |
91 add r7, r2, r6 | |
92 ldr r5, [r11, #FIX_M_1_847759065_ID] | |
93 mul r7, r3, r7 @ r7 = z1 | |
94 ldr r3, [r11, #FIX_0_765366865_ID] | |
95 mla r6, r5, r6, r7 @ r6 = tmp2 | |
96 add r5, r0, r4 @ r5 = tmp0 | |
97 mla r2, r3, r2, r7 @ r2 = tmp3 | |
98 sub r3, r0, r4 @ r3 = tmp1 | |
99 | |
100 add r0, r2, r5, lsl #13 @ r0 = tmp10 | |
101 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |
102 add r4, r6, r3, lsl #13 @ r4 = tmp11 | |
103 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 | |
104 | |
105 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |
106 | |
107 ldrsh r3, [lr, #10] @ r3 = 'd3' | |
108 ldrsh r5, [lr, #12] @ r5 = 'd5' | |
109 ldrsh r7, [lr, #14] @ r7 = 'd7' | |
110 | |
111 add r0, r3, r5 @ r0 = 'z2' | |
112 add r2, r1, r7 @ r2 = 'z1' | |
113 add r4, r3, r7 @ r4 = 'z3' | |
114 add r6, r1, r5 @ r6 = 'z4' | |
115 ldr r9, [r11, #FIX_1_175875602_ID] | |
116 add r8, r4, r6 @ r8 = z3 + z4 | |
117 ldr r10, [r11, #FIX_M_0_899976223_ID] | |
118 mul r8, r9, r8 @ r8 = 'z5' | |
119 ldr r9, [r11, #FIX_M_2_562915447_ID] | |
120 mul r2, r10, r2 @ r2 = 'z1' | |
121 ldr r10, [r11, #FIX_M_1_961570560_ID] | |
122 mul r0, r9, r0 @ r0 = 'z2' | |
123 ldr r9, [r11, #FIX_M_0_390180644_ID] | |
124 mla r4, r10, r4, r8 @ r4 = 'z3' | |
125 ldr r10, [r11, #FIX_0_298631336_ID] | |
126 mla r6, r9, r6, r8 @ r6 = 'z4' | |
127 ldr r9, [r11, #FIX_2_053119869_ID] | |
128 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |
129 ldr r10, [r11, #FIX_3_072711026_ID] | |
130 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |
131 ldr r9, [r11, #FIX_1_501321110_ID] | |
132 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |
133 add r7, r7, r4 @ r7 = tmp0 | |
134 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |
135 add r5, r5, r6 @ r5 = tmp1 | |
136 add r3, r3, r4 @ r3 = tmp2 | |
137 add r1, r1, r6 @ r1 = tmp3 | |
138 | |
139 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 | |
140 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |
141 | |
142 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) | |
143 add r8, r0, r1 | |
144 add r8, r8, #(1<<10) | |
145 mov r8, r8, asr #11 | |
146 strh r8, [lr, # 0] | |
147 | |
148 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) | |
149 sub r8, r0, r1 | |
150 add r8, r8, #(1<<10) | |
151 mov r8, r8, asr #11 | |
152 strh r8, [lr, #14] | |
153 | |
154 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) | |
155 add r8, r6, r3 | |
156 add r8, r8, #(1<<10) | |
157 mov r8, r8, asr #11 | |
158 strh r8, [lr, # 2] | |
159 | |
160 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) | |
161 sub r8, r6, r3 | |
162 add r8, r8, #(1<<10) | |
163 mov r8, r8, asr #11 | |
164 strh r8, [lr, #12] | |
165 | |
166 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) | |
167 add r8, r4, r5 | |
168 add r8, r8, #(1<<10) | |
169 mov r8, r8, asr #11 | |
170 strh r8, [lr, # 4] | |
171 | |
172 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) | |
173 sub r8, r4, r5 | |
174 add r8, r8, #(1<<10) | |
175 mov r8, r8, asr #11 | |
176 strh r8, [lr, #10] | |
177 | |
178 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) | |
179 add r8, r2, r7 | |
180 add r8, r8, #(1<<10) | |
181 mov r8, r8, asr #11 | |
182 strh r8, [lr, # 6] | |
183 | |
184 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) | |
185 sub r8, r2, r7 | |
186 add r8, r8, #(1<<10) | |
187 mov r8, r8, asr #11 | |
188 strh r8, [lr, # 8] | |
189 | |
190 @ End of row loop | |
191 add lr, lr, #16 | |
192 subs r12, r12, #1 | |
193 bne row_loop | |
194 beq start_column_loop | |
195 | |
196 empty_row: | |
197 ldr r1, [r11, #FIX_0xFFFF_ID] | |
198 mov r0, r0, lsl #2 | |
199 and r0, r0, r1 | |
200 add r0, r0, r0, lsl #16 | |
201 str r0, [lr, # 0] | |
202 str r0, [lr, # 4] | |
203 str r0, [lr, # 8] | |
204 str r0, [lr, #12] | |
205 | |
206 end_of_row_loop: | |
207 @ End of loop | |
208 add lr, lr, #16 | |
209 subs r12, r12, #1 | |
210 bne row_loop | |
211 | |
212 start_column_loop: | |
213 @ Start of column loop | |
214 ldr lr, [ sp ] | |
215 mov r12, #8 | |
216 column_loop: | |
217 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' | |
218 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' | |
219 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' | |
220 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' | |
221 | |
222 ldr r3, [r11, #FIX_0_541196100_ID] | |
223 add r1, r2, r6 | |
224 ldr r5, [r11, #FIX_M_1_847759065_ID] | |
225 mul r1, r3, r1 @ r1 = z1 | |
226 ldr r3, [r11, #FIX_0_765366865_ID] | |
227 mla r6, r5, r6, r1 @ r6 = tmp2 | |
228 add r5, r0, r4 @ r5 = tmp0 | |
229 mla r2, r3, r2, r1 @ r2 = tmp3 | |
230 sub r3, r0, r4 @ r3 = tmp1 | |
231 | |
232 add r0, r2, r5, lsl #13 @ r0 = tmp10 | |
233 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |
234 add r4, r6, r3, lsl #13 @ r4 = tmp11 | |
235 rsb r6, r6, r3, lsl #13 @ r6 = tmp12 | |
236 | |
237 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' | |
238 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' | |
239 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' | |
240 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' | |
241 | |
242 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) | |
243 orr r9, r1, r3 | |
244 orr r10, r5, r7 | |
245 orrs r10, r9, r10 | |
246 beq empty_odd_column | |
247 | |
248 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |
249 | |
250 add r0, r3, r5 @ r0 = 'z2' | |
251 add r2, r1, r7 @ r2 = 'z1' | |
252 add r4, r3, r7 @ r4 = 'z3' | |
253 add r6, r1, r5 @ r6 = 'z4' | |
254 ldr r9, [r11, #FIX_1_175875602_ID] | |
255 add r8, r4, r6 | |
256 ldr r10, [r11, #FIX_M_0_899976223_ID] | |
257 mul r8, r9, r8 @ r8 = 'z5' | |
258 ldr r9, [r11, #FIX_M_2_562915447_ID] | |
259 mul r2, r10, r2 @ r2 = 'z1' | |
260 ldr r10, [r11, #FIX_M_1_961570560_ID] | |
261 mul r0, r9, r0 @ r0 = 'z2' | |
262 ldr r9, [r11, #FIX_M_0_390180644_ID] | |
263 mla r4, r10, r4, r8 @ r4 = 'z3' | |
264 ldr r10, [r11, #FIX_0_298631336_ID] | |
265 mla r6, r9, r6, r8 @ r6 = 'z4' | |
266 ldr r9, [r11, #FIX_2_053119869_ID] | |
267 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |
268 ldr r10, [r11, #FIX_3_072711026_ID] | |
269 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |
270 ldr r9, [r11, #FIX_1_501321110_ID] | |
271 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |
272 add r7, r7, r4 @ r7 = tmp0 | |
273 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |
274 add r5, r5, r6 @ r5 = tmp1 | |
275 add r3, r3, r4 @ r3 = tmp2 | |
276 add r1, r1, r6 @ r1 = tmp3 | |
277 | |
278 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 | |
279 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |
280 | |
281 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |
282 add r8, r0, r1 | |
283 add r8, r8, #(1<<17) | |
284 mov r8, r8, asr #18 | |
285 strh r8, [lr, #( 0*8)] | |
286 | |
287 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |
288 sub r8, r0, r1 | |
289 add r8, r8, #(1<<17) | |
290 mov r8, r8, asr #18 | |
291 strh r8, [lr, #(14*8)] | |
292 | |
293 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |
294 add r8, r4, r3 | |
295 add r8, r8, #(1<<17) | |
296 mov r8, r8, asr #18 | |
297 strh r8, [lr, #( 2*8)] | |
298 | |
299 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |
300 sub r8, r4, r3 | |
301 add r8, r8, #(1<<17) | |
302 mov r8, r8, asr #18 | |
303 strh r8, [lr, #(12*8)] | |
304 | |
305 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |
306 add r8, r6, r5 | |
307 add r8, r8, #(1<<17) | |
308 mov r8, r8, asr #18 | |
309 strh r8, [lr, #( 4*8)] | |
310 | |
311 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |
312 sub r8, r6, r5 | |
313 add r8, r8, #(1<<17) | |
314 mov r8, r8, asr #18 | |
315 strh r8, [lr, #(10*8)] | |
316 | |
317 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |
318 add r8, r2, r7 | |
319 add r8, r8, #(1<<17) | |
320 mov r8, r8, asr #18 | |
321 strh r8, [lr, #( 6*8)] | |
322 | |
323 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |
324 sub r8, r2, r7 | |
325 add r8, r8, #(1<<17) | |
326 mov r8, r8, asr #18 | |
327 strh r8, [lr, #( 8*8)] | |
328 | |
329 @ End of row loop | |
330 add lr, lr, #2 | |
331 subs r12, r12, #1 | |
332 bne column_loop | |
333 beq the_end | |
334 | |
335 empty_odd_column: | |
336 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |
337 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |
338 add r0, r0, #(1<<17) | |
339 mov r0, r0, asr #18 | |
340 strh r0, [lr, #( 0*8)] | |
341 strh r0, [lr, #(14*8)] | |
342 | |
343 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |
344 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |
345 add r4, r4, #(1<<17) | |
346 mov r4, r4, asr #18 | |
347 strh r4, [lr, #( 2*8)] | |
348 strh r4, [lr, #(12*8)] | |
349 | |
350 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |
351 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |
352 add r6, r6, #(1<<17) | |
353 mov r6, r6, asr #18 | |
354 strh r6, [lr, #( 4*8)] | |
355 strh r6, [lr, #(10*8)] | |
356 | |
357 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |
358 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |
359 add r2, r2, #(1<<17) | |
360 mov r2, r2, asr #18 | |
361 strh r2, [lr, #( 6*8)] | |
362 strh r2, [lr, #( 8*8)] | |
363 | |
364 @ End of row loop | |
365 add lr, lr, #2 | |
366 subs r12, r12, #1 | |
367 bne column_loop | |
368 | |
369 the_end: | |
370 @ The end.... | |
371 add sp, sp, #4 | |
372 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | |
373 | |
374 const_array: | |
375 .align | |
376 .word FIX_0_298631336 | |
377 .word FIX_0_541196100 | |
378 .word FIX_0_765366865 | |
379 .word FIX_1_175875602 | |
380 .word FIX_1_501321110 | |
381 .word FIX_2_053119869 | |
382 .word FIX_3_072711026 | |
383 .word FIX_M_0_390180644 | |
384 .word FIX_M_0_899976223 | |
385 .word FIX_M_1_847759065 | |
386 .word FIX_M_1_961570560 | |
387 .word FIX_M_2_562915447 | |
388 .word FIX_0xFFFF |