Mercurial > libavcodec.hg
comparison armv4l/jrevdct_arm.S @ 2979:bfabfdf9ce55 libavcodec
COSMETICS: tabs --> spaces, some prettyprinting
author | diego |
---|---|
date | Thu, 22 Dec 2005 01:10:11 +0000 |
parents | ef2149182f1c |
children | 316762ae96a7 |
comparison
equal
deleted
inserted
replaced
2978:403183bbb505 | 2979:bfabfdf9ce55 |
---|---|
1 /* | 1 /* |
2 C-like prototype : | 2 C-like prototype : |
3 void j_rev_dct_ARM(DCTBLOCK data) | 3 void j_rev_dct_ARM(DCTBLOCK data) |
4 | 4 |
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' | 5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' |
6 | 6 |
7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) | 7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) |
8 | 8 |
49 #define FIX_M_0_899976223_ID 32 | 49 #define FIX_M_0_899976223_ID 32 |
50 #define FIX_M_1_847759065_ID 36 | 50 #define FIX_M_1_847759065_ID 36 |
51 #define FIX_M_1_961570560_ID 40 | 51 #define FIX_M_1_961570560_ID 40 |
52 #define FIX_M_2_562915447_ID 44 | 52 #define FIX_M_2_562915447_ID 44 |
53 #define FIX_0xFFFF_ID 48 | 53 #define FIX_0xFFFF_ID 48 |
54 .text | 54 .text |
55 .align | 55 .align |
56 | 56 |
57 .global j_rev_dct_ARM | 57 .global j_rev_dct_ARM |
58 j_rev_dct_ARM: | 58 j_rev_dct_ARM: |
59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs | 59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs |
60 | 60 |
61 sub sp, sp, #4 @ reserve some space on the stack | 61 sub sp, sp, #4 @ reserve some space on the stack |
62 str r0, [ sp ] @ save the DCT pointer to the stack | 62 str r0, [ sp ] @ save the DCT pointer to the stack |
63 | 63 |
64 mov lr, r0 @ lr = pointer to the current row | 64 mov lr, r0 @ lr = pointer to the current row |
65 mov r12, #8 @ r12 = row-counter | 65 mov r12, #8 @ r12 = row-counter |
66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array | 66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array |
67 row_loop: | 67 row_loop: |
68 ldrsh r0, [lr, # 0] @ r0 = 'd0' | 68 ldrsh r0, [lr, # 0] @ r0 = 'd0' |
69 ldrsh r1, [lr, # 8] @ r1 = 'd1' | 69 ldrsh r1, [lr, # 8] @ r1 = 'd1' |
70 | 70 |
71 @ Optimization for row that have all items except the first set to 0 | 71 @ Optimization for row that have all items except the first set to 0 |
72 @ (this works as the DCTELEMS are always 4-byte aligned) | 72 @ (this works as the DCTELEMS are always 4-byte aligned) |
73 ldr r5, [lr, # 0] | 73 ldr r5, [lr, # 0] |
74 ldr r2, [lr, # 4] | 74 ldr r2, [lr, # 4] |
75 ldr r3, [lr, # 8] | 75 ldr r3, [lr, # 8] |
76 ldr r4, [lr, #12] | 76 ldr r4, [lr, #12] |
77 orr r3, r3, r4 | 77 orr r3, r3, r4 |
78 orr r3, r3, r2 | 78 orr r3, r3, r2 |
79 orrs r5, r3, r5 | 79 orrs r5, r3, r5 |
80 beq end_of_row_loop @ nothing to be done as ALL of them are '0' | 80 beq end_of_row_loop @ nothing to be done as ALL of them are '0' |
81 orrs r2, r3, r1 | 81 orrs r2, r3, r1 |
82 beq empty_row | 82 beq empty_row |
83 | 83 |
84 ldrsh r2, [lr, # 2] @ r2 = 'd2' | 84 ldrsh r2, [lr, # 2] @ r2 = 'd2' |
85 ldrsh r4, [lr, # 4] @ r4 = 'd4' | 85 ldrsh r4, [lr, # 4] @ r4 = 'd4' |
86 ldrsh r6, [lr, # 6] @ r6 = 'd6' | 86 ldrsh r6, [lr, # 6] @ r6 = 'd6' |
87 | 87 |
88 ldr r3, [r11, #FIX_0_541196100_ID] | 88 ldr r3, [r11, #FIX_0_541196100_ID] |
89 add r7, r2, r6 | 89 add r7, r2, r6 |
90 ldr r5, [r11, #FIX_M_1_847759065_ID] | 90 ldr r5, [r11, #FIX_M_1_847759065_ID] |
91 mul r7, r3, r7 @ r7 = z1 | 91 mul r7, r3, r7 @ r7 = z1 |
92 ldr r3, [r11, #FIX_0_765366865_ID] | 92 ldr r3, [r11, #FIX_0_765366865_ID] |
93 mla r6, r5, r6, r7 @ r6 = tmp2 | 93 mla r6, r5, r6, r7 @ r6 = tmp2 |
94 add r5, r0, r4 @ r5 = tmp0 | 94 add r5, r0, r4 @ r5 = tmp0 |
95 mla r2, r3, r2, r7 @ r2 = tmp3 | 95 mla r2, r3, r2, r7 @ r2 = tmp3 |
96 sub r3, r0, r4 @ r3 = tmp1 | 96 sub r3, r0, r4 @ r3 = tmp1 |
97 | 97 |
98 add r0, r2, r5, lsl #13 @ r0 = tmp10 | 98 add r0, r2, r5, lsl #13 @ r0 = tmp10 |
99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | 99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 |
100 add r4, r6, r3, lsl #13 @ r4 = tmp11 | 100 add r4, r6, r3, lsl #13 @ r4 = tmp11 |
101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 | 101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 |
102 | 102 |
103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | 103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 |
104 | 104 |
105 ldrsh r3, [lr, #10] @ r3 = 'd3' | 105 ldrsh r3, [lr, #10] @ r3 = 'd3' |
106 ldrsh r5, [lr, #12] @ r5 = 'd5' | 106 ldrsh r5, [lr, #12] @ r5 = 'd5' |
107 ldrsh r7, [lr, #14] @ r7 = 'd7' | 107 ldrsh r7, [lr, #14] @ r7 = 'd7' |
108 | 108 |
109 add r0, r3, r5 @ r0 = 'z2' | 109 add r0, r3, r5 @ r0 = 'z2' |
110 add r2, r1, r7 @ r2 = 'z1' | 110 add r2, r1, r7 @ r2 = 'z1' |
111 add r4, r3, r7 @ r4 = 'z3' | 111 add r4, r3, r7 @ r4 = 'z3' |
112 add r6, r1, r5 @ r6 = 'z4' | 112 add r6, r1, r5 @ r6 = 'z4' |
113 ldr r9, [r11, #FIX_1_175875602_ID] | 113 ldr r9, [r11, #FIX_1_175875602_ID] |
114 add r8, r4, r6 @ r8 = z3 + z4 | 114 add r8, r4, r6 @ r8 = z3 + z4 |
115 ldr r10, [r11, #FIX_M_0_899976223_ID] | 115 ldr r10, [r11, #FIX_M_0_899976223_ID] |
116 mul r8, r9, r8 @ r8 = 'z5' | 116 mul r8, r9, r8 @ r8 = 'z5' |
117 ldr r9, [r11, #FIX_M_2_562915447_ID] | 117 ldr r9, [r11, #FIX_M_2_562915447_ID] |
118 mul r2, r10, r2 @ r2 = 'z1' | 118 mul r2, r10, r2 @ r2 = 'z1' |
119 ldr r10, [r11, #FIX_M_1_961570560_ID] | 119 ldr r10, [r11, #FIX_M_1_961570560_ID] |
120 mul r0, r9, r0 @ r0 = 'z2' | 120 mul r0, r9, r0 @ r0 = 'z2' |
121 ldr r9, [r11, #FIX_M_0_390180644_ID] | 121 ldr r9, [r11, #FIX_M_0_390180644_ID] |
122 mla r4, r10, r4, r8 @ r4 = 'z3' | 122 mla r4, r10, r4, r8 @ r4 = 'z3' |
123 ldr r10, [r11, #FIX_0_298631336_ID] | 123 ldr r10, [r11, #FIX_0_298631336_ID] |
124 mla r6, r9, r6, r8 @ r6 = 'z4' | 124 mla r6, r9, r6, r8 @ r6 = 'z4' |
125 ldr r9, [r11, #FIX_2_053119869_ID] | 125 ldr r9, [r11, #FIX_2_053119869_ID] |
126 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | 126 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 |
127 ldr r10, [r11, #FIX_3_072711026_ID] | 127 ldr r10, [r11, #FIX_3_072711026_ID] |
128 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | 128 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 |
129 ldr r9, [r11, #FIX_1_501321110_ID] | 129 ldr r9, [r11, #FIX_1_501321110_ID] |
130 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | 130 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 |
131 add r7, r7, r4 @ r7 = tmp0 | 131 add r7, r7, r4 @ r7 = tmp0 |
132 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | 132 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 |
133 add r5, r5, r6 @ r5 = tmp1 | 133 add r5, r5, r6 @ r5 = tmp1 |
134 add r3, r3, r4 @ r3 = tmp2 | 134 add r3, r3, r4 @ r3 = tmp2 |
135 add r1, r1, r6 @ r1 = tmp3 | 135 add r1, r1, r6 @ r1 = tmp3 |
136 | 136 |
137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 | 137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 |
138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | 138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 |
139 | 139 |
140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) | 140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) |
141 add r8, r0, r1 | 141 add r8, r0, r1 |
142 add r8, r8, #(1<<10) | 142 add r8, r8, #(1<<10) |
143 mov r8, r8, asr #11 | 143 mov r8, r8, asr #11 |
144 strh r8, [lr, # 0] | 144 strh r8, [lr, # 0] |
145 | 145 |
146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) | 146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) |
147 sub r8, r0, r1 | 147 sub r8, r0, r1 |
148 add r8, r8, #(1<<10) | 148 add r8, r8, #(1<<10) |
149 mov r8, r8, asr #11 | 149 mov r8, r8, asr #11 |
150 strh r8, [lr, #14] | 150 strh r8, [lr, #14] |
151 | 151 |
152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) | 152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) |
153 add r8, r6, r3 | 153 add r8, r6, r3 |
154 add r8, r8, #(1<<10) | 154 add r8, r8, #(1<<10) |
155 mov r8, r8, asr #11 | 155 mov r8, r8, asr #11 |
156 strh r8, [lr, # 2] | 156 strh r8, [lr, # 2] |
157 | 157 |
158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) | 158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) |
159 sub r8, r6, r3 | 159 sub r8, r6, r3 |
160 add r8, r8, #(1<<10) | 160 add r8, r8, #(1<<10) |
161 mov r8, r8, asr #11 | 161 mov r8, r8, asr #11 |
162 strh r8, [lr, #12] | 162 strh r8, [lr, #12] |
163 | 163 |
164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) | 164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) |
165 add r8, r4, r5 | 165 add r8, r4, r5 |
166 add r8, r8, #(1<<10) | 166 add r8, r8, #(1<<10) |
167 mov r8, r8, asr #11 | 167 mov r8, r8, asr #11 |
168 strh r8, [lr, # 4] | 168 strh r8, [lr, # 4] |
169 | 169 |
170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) | 170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) |
171 sub r8, r4, r5 | 171 sub r8, r4, r5 |
172 add r8, r8, #(1<<10) | 172 add r8, r8, #(1<<10) |
173 mov r8, r8, asr #11 | 173 mov r8, r8, asr #11 |
174 strh r8, [lr, #10] | 174 strh r8, [lr, #10] |
175 | 175 |
176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) | 176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) |
177 add r8, r2, r7 | 177 add r8, r2, r7 |
178 add r8, r8, #(1<<10) | 178 add r8, r8, #(1<<10) |
179 mov r8, r8, asr #11 | 179 mov r8, r8, asr #11 |
180 strh r8, [lr, # 6] | 180 strh r8, [lr, # 6] |
181 | 181 |
182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) | 182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) |
183 sub r8, r2, r7 | 183 sub r8, r2, r7 |
184 add r8, r8, #(1<<10) | 184 add r8, r8, #(1<<10) |
185 mov r8, r8, asr #11 | 185 mov r8, r8, asr #11 |
186 strh r8, [lr, # 8] | 186 strh r8, [lr, # 8] |
187 | 187 |
188 @ End of row loop | 188 @ End of row loop |
189 add lr, lr, #16 | 189 add lr, lr, #16 |
190 subs r12, r12, #1 | 190 subs r12, r12, #1 |
191 bne row_loop | 191 bne row_loop |
192 beq start_column_loop | 192 beq start_column_loop |
193 | 193 |
194 empty_row: | 194 empty_row: |
195 ldr r1, [r11, #FIX_0xFFFF_ID] | 195 ldr r1, [r11, #FIX_0xFFFF_ID] |
196 mov r0, r0, lsl #2 | 196 mov r0, r0, lsl #2 |
197 and r0, r0, r1 | 197 and r0, r0, r1 |
198 add r0, r0, r0, lsl #16 | 198 add r0, r0, r0, lsl #16 |
199 str r0, [lr, # 0] | 199 str r0, [lr, # 0] |
200 str r0, [lr, # 4] | 200 str r0, [lr, # 4] |
201 str r0, [lr, # 8] | 201 str r0, [lr, # 8] |
202 str r0, [lr, #12] | 202 str r0, [lr, #12] |
203 | 203 |
204 end_of_row_loop: | 204 end_of_row_loop: |
205 @ End of loop | 205 @ End of loop |
206 add lr, lr, #16 | 206 add lr, lr, #16 |
207 subs r12, r12, #1 | 207 subs r12, r12, #1 |
208 bne row_loop | 208 bne row_loop |
209 | 209 |
210 start_column_loop: | 210 start_column_loop: |
211 @ Start of column loop | 211 @ Start of column loop |
212 ldr lr, [ sp ] | 212 ldr lr, [ sp ] |
213 mov r12, #8 | 213 mov r12, #8 |
214 column_loop: | 214 column_loop: |
215 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' | 215 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' |
216 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' | 216 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' |
217 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' | 217 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' |
218 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' | 218 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' |
219 | 219 |
220 ldr r3, [r11, #FIX_0_541196100_ID] | 220 ldr r3, [r11, #FIX_0_541196100_ID] |
221 add r1, r2, r6 | 221 add r1, r2, r6 |
222 ldr r5, [r11, #FIX_M_1_847759065_ID] | 222 ldr r5, [r11, #FIX_M_1_847759065_ID] |
223 mul r1, r3, r1 @ r1 = z1 | 223 mul r1, r3, r1 @ r1 = z1 |
224 ldr r3, [r11, #FIX_0_765366865_ID] | 224 ldr r3, [r11, #FIX_0_765366865_ID] |
225 mla r6, r5, r6, r1 @ r6 = tmp2 | 225 mla r6, r5, r6, r1 @ r6 = tmp2 |
226 add r5, r0, r4 @ r5 = tmp0 | 226 add r5, r0, r4 @ r5 = tmp0 |
227 mla r2, r3, r2, r1 @ r2 = tmp3 | 227 mla r2, r3, r2, r1 @ r2 = tmp3 |
228 sub r3, r0, r4 @ r3 = tmp1 | 228 sub r3, r0, r4 @ r3 = tmp1 |
229 | 229 |
230 add r0, r2, r5, lsl #13 @ r0 = tmp10 | 230 add r0, r2, r5, lsl #13 @ r0 = tmp10 |
231 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | 231 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 |
232 add r4, r6, r3, lsl #13 @ r4 = tmp11 | 232 add r4, r6, r3, lsl #13 @ r4 = tmp11 |
233 rsb r6, r6, r3, lsl #13 @ r6 = tmp12 | 233 rsb r6, r6, r3, lsl #13 @ r6 = tmp12 |
234 | 234 |
235 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' | 235 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' |
236 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' | 236 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' |
237 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' | 237 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' |
238 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' | 238 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' |
239 | 239 |
240 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) | 240 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) |
241 orr r9, r1, r3 | 241 orr r9, r1, r3 |
242 orr r10, r5, r7 | 242 orr r10, r5, r7 |
243 orrs r10, r9, r10 | 243 orrs r10, r9, r10 |
244 beq empty_odd_column | 244 beq empty_odd_column |
245 | 245 |
246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | 246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 |
247 | 247 |
248 add r0, r3, r5 @ r0 = 'z2' | 248 add r0, r3, r5 @ r0 = 'z2' |
249 add r2, r1, r7 @ r2 = 'z1' | 249 add r2, r1, r7 @ r2 = 'z1' |
250 add r4, r3, r7 @ r4 = 'z3' | 250 add r4, r3, r7 @ r4 = 'z3' |
251 add r6, r1, r5 @ r6 = 'z4' | 251 add r6, r1, r5 @ r6 = 'z4' |
252 ldr r9, [r11, #FIX_1_175875602_ID] | 252 ldr r9, [r11, #FIX_1_175875602_ID] |
253 add r8, r4, r6 | 253 add r8, r4, r6 |
254 ldr r10, [r11, #FIX_M_0_899976223_ID] | 254 ldr r10, [r11, #FIX_M_0_899976223_ID] |
255 mul r8, r9, r8 @ r8 = 'z5' | 255 mul r8, r9, r8 @ r8 = 'z5' |
256 ldr r9, [r11, #FIX_M_2_562915447_ID] | 256 ldr r9, [r11, #FIX_M_2_562915447_ID] |
257 mul r2, r10, r2 @ r2 = 'z1' | 257 mul r2, r10, r2 @ r2 = 'z1' |
258 ldr r10, [r11, #FIX_M_1_961570560_ID] | 258 ldr r10, [r11, #FIX_M_1_961570560_ID] |
259 mul r0, r9, r0 @ r0 = 'z2' | 259 mul r0, r9, r0 @ r0 = 'z2' |
260 ldr r9, [r11, #FIX_M_0_390180644_ID] | 260 ldr r9, [r11, #FIX_M_0_390180644_ID] |
261 mla r4, r10, r4, r8 @ r4 = 'z3' | 261 mla r4, r10, r4, r8 @ r4 = 'z3' |
262 ldr r10, [r11, #FIX_0_298631336_ID] | 262 ldr r10, [r11, #FIX_0_298631336_ID] |
263 mla r6, r9, r6, r8 @ r6 = 'z4' | 263 mla r6, r9, r6, r8 @ r6 = 'z4' |
264 ldr r9, [r11, #FIX_2_053119869_ID] | 264 ldr r9, [r11, #FIX_2_053119869_ID] |
265 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | 265 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 |
266 ldr r10, [r11, #FIX_3_072711026_ID] | 266 ldr r10, [r11, #FIX_3_072711026_ID] |
267 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | 267 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 |
268 ldr r9, [r11, #FIX_1_501321110_ID] | 268 ldr r9, [r11, #FIX_1_501321110_ID] |
269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | 269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 |
270 add r7, r7, r4 @ r7 = tmp0 | 270 add r7, r7, r4 @ r7 = tmp0 |
271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | 271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 |
272 add r5, r5, r6 @ r5 = tmp1 | 272 add r5, r5, r6 @ r5 = tmp1 |
273 add r3, r3, r4 @ r3 = tmp2 | 273 add r3, r3, r4 @ r3 = tmp2 |
274 add r1, r1, r6 @ r1 = tmp3 | 274 add r1, r1, r6 @ r1 = tmp3 |
275 | 275 |
276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 | 276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 |
277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | 277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 |
278 | 278 |
279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | 279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) |
280 add r8, r0, r1 | 280 add r8, r0, r1 |
281 add r8, r8, #(1<<17) | 281 add r8, r8, #(1<<17) |
282 mov r8, r8, asr #18 | 282 mov r8, r8, asr #18 |
283 strh r8, [lr, #( 0*8)] | 283 strh r8, [lr, #( 0*8)] |
284 | 284 |
285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | 285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) |
286 sub r8, r0, r1 | 286 sub r8, r0, r1 |
287 add r8, r8, #(1<<17) | 287 add r8, r8, #(1<<17) |
288 mov r8, r8, asr #18 | 288 mov r8, r8, asr #18 |
289 strh r8, [lr, #(14*8)] | 289 strh r8, [lr, #(14*8)] |
290 | 290 |
291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | 291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) |
292 add r8, r4, r3 | 292 add r8, r4, r3 |
293 add r8, r8, #(1<<17) | 293 add r8, r8, #(1<<17) |
294 mov r8, r8, asr #18 | 294 mov r8, r8, asr #18 |
295 strh r8, [lr, #( 2*8)] | 295 strh r8, [lr, #( 2*8)] |
296 | 296 |
297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | 297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) |
298 sub r8, r4, r3 | 298 sub r8, r4, r3 |
299 add r8, r8, #(1<<17) | 299 add r8, r8, #(1<<17) |
300 mov r8, r8, asr #18 | 300 mov r8, r8, asr #18 |
301 strh r8, [lr, #(12*8)] | 301 strh r8, [lr, #(12*8)] |
302 | 302 |
303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | 303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) |
304 add r8, r6, r5 | 304 add r8, r6, r5 |
305 add r8, r8, #(1<<17) | 305 add r8, r8, #(1<<17) |
306 mov r8, r8, asr #18 | 306 mov r8, r8, asr #18 |
307 strh r8, [lr, #( 4*8)] | 307 strh r8, [lr, #( 4*8)] |
308 | 308 |
309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | 309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) |
310 sub r8, r6, r5 | 310 sub r8, r6, r5 |
311 add r8, r8, #(1<<17) | 311 add r8, r8, #(1<<17) |
312 mov r8, r8, asr #18 | 312 mov r8, r8, asr #18 |
313 strh r8, [lr, #(10*8)] | 313 strh r8, [lr, #(10*8)] |
314 | 314 |
315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | 315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) |
316 add r8, r2, r7 | 316 add r8, r2, r7 |
317 add r8, r8, #(1<<17) | 317 add r8, r8, #(1<<17) |
318 mov r8, r8, asr #18 | 318 mov r8, r8, asr #18 |
319 strh r8, [lr, #( 6*8)] | 319 strh r8, [lr, #( 6*8)] |
320 | 320 |
321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | 321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) |
322 sub r8, r2, r7 | 322 sub r8, r2, r7 |
323 add r8, r8, #(1<<17) | 323 add r8, r8, #(1<<17) |
324 mov r8, r8, asr #18 | 324 mov r8, r8, asr #18 |
325 strh r8, [lr, #( 8*8)] | 325 strh r8, [lr, #( 8*8)] |
326 | 326 |
327 @ End of row loop | 327 @ End of row loop |
328 add lr, lr, #2 | 328 add lr, lr, #2 |
329 subs r12, r12, #1 | 329 subs r12, r12, #1 |
330 bne column_loop | 330 bne column_loop |
331 beq the_end | 331 beq the_end |
332 | 332 |
333 empty_odd_column: | 333 empty_odd_column: |
334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | 334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) |
335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | 335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) |
336 add r0, r0, #(1<<17) | 336 add r0, r0, #(1<<17) |
337 mov r0, r0, asr #18 | 337 mov r0, r0, asr #18 |
338 strh r0, [lr, #( 0*8)] | 338 strh r0, [lr, #( 0*8)] |
339 strh r0, [lr, #(14*8)] | 339 strh r0, [lr, #(14*8)] |
340 | 340 |
341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | 341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) |
342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | 342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) |
343 add r4, r4, #(1<<17) | 343 add r4, r4, #(1<<17) |
344 mov r4, r4, asr #18 | 344 mov r4, r4, asr #18 |
345 strh r4, [lr, #( 2*8)] | 345 strh r4, [lr, #( 2*8)] |
346 strh r4, [lr, #(12*8)] | 346 strh r4, [lr, #(12*8)] |
347 | 347 |
348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | 348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) |
349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | 349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) |
350 add r6, r6, #(1<<17) | 350 add r6, r6, #(1<<17) |
351 mov r6, r6, asr #18 | 351 mov r6, r6, asr #18 |
352 strh r6, [lr, #( 4*8)] | 352 strh r6, [lr, #( 4*8)] |
353 strh r6, [lr, #(10*8)] | 353 strh r6, [lr, #(10*8)] |
354 | 354 |
355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | 355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) |
356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | 356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) |
357 add r2, r2, #(1<<17) | 357 add r2, r2, #(1<<17) |
358 mov r2, r2, asr #18 | 358 mov r2, r2, asr #18 |
359 strh r2, [lr, #( 6*8)] | 359 strh r2, [lr, #( 6*8)] |
360 strh r2, [lr, #( 8*8)] | 360 strh r2, [lr, #( 8*8)] |
361 | 361 |
362 @ End of row loop | 362 @ End of row loop |
363 add lr, lr, #2 | 363 add lr, lr, #2 |
364 subs r12, r12, #1 | 364 subs r12, r12, #1 |
365 bne column_loop | 365 bne column_loop |
366 | 366 |
367 the_end: | 367 the_end: |
368 @ The end.... | 368 @ The end.... |
369 add sp, sp, #4 | 369 add sp, sp, #4 |
370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | 370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return |
371 | 371 |
372 const_array: | 372 const_array: |
373 .align | 373 .align |
374 .word FIX_0_298631336 | 374 .word FIX_0_298631336 |
375 .word FIX_0_541196100 | 375 .word FIX_0_541196100 |
376 .word FIX_0_765366865 | 376 .word FIX_0_765366865 |
377 .word FIX_1_175875602 | 377 .word FIX_1_175875602 |
378 .word FIX_1_501321110 | 378 .word FIX_1_501321110 |
379 .word FIX_2_053119869 | 379 .word FIX_2_053119869 |
380 .word FIX_3_072711026 | 380 .word FIX_3_072711026 |
381 .word FIX_M_0_390180644 | 381 .word FIX_M_0_390180644 |
382 .word FIX_M_0_899976223 | 382 .word FIX_M_0_899976223 |
383 .word FIX_M_1_847759065 | 383 .word FIX_M_1_847759065 |
384 .word FIX_M_1_961570560 | 384 .word FIX_M_1_961570560 |
385 .word FIX_M_2_562915447 | 385 .word FIX_M_2_562915447 |
386 .word FIX_0xFFFF | 386 .word FIX_0xFFFF |