comparison arm/dsputil_arm_s.S @ 10355:47245bb9e85f libavcodec

ARM: whitespace cosmetics
author mru
date Sat, 03 Oct 2009 18:22:49 +0000
parents 5698999894ec
children f8d0701ff445
comparison
equal deleted inserted replaced
10354:24a069f83049 10355:47245bb9e85f
29 .endm 29 .endm
30 #endif 30 #endif
31 31
32 #if HAVE_ARMV5TE 32 #if HAVE_ARMV5TE
33 function ff_prefetch_arm, export=1 33 function ff_prefetch_arm, export=1
34 subs r2, r2, #1 34 subs r2, r2, #1
35 pld [r0] 35 pld [r0]
36 add r0, r0, r1 36 add r0, r0, r1
37 bne ff_prefetch_arm 37 bne ff_prefetch_arm
38 bx lr 38 bx lr
39 .endfunc 39 .endfunc
40 #endif 40 #endif
41 41
42 .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 42 .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43 mov \Rd0, \Rn0, lsr #(\shift * 8) 43 mov \Rd0, \Rn0, lsr #(\shift * 8)
44 mov \Rd1, \Rn1, lsr #(\shift * 8) 44 mov \Rd1, \Rn1, lsr #(\shift * 8)
45 mov \Rd2, \Rn2, lsr #(\shift * 8) 45 mov \Rd2, \Rn2, lsr #(\shift * 8)
46 mov \Rd3, \Rn3, lsr #(\shift * 8) 46 mov \Rd3, \Rn3, lsr #(\shift * 8)
47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) 47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) 48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) 49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) 50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51 .endm 51 .endm
52 .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2 52 .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53 mov \R0, \R0, lsr #(\shift * 8) 53 mov \R0, \R0, lsr #(\shift * 8)
54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) 54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55 mov \R1, \R1, lsr #(\shift * 8) 55 mov \R1, \R1, lsr #(\shift * 8)
56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) 56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57 .endm 57 .endm
58 .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 58 .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) 59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) 60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) 61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) 62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63 .endm 63 .endm
64 64
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask 65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) 66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67 @ Rmask = 0xFEFEFEFE 67 @ Rmask = 0xFEFEFEFE
68 @ Rn = destroy 68 @ Rn = destroy
69 eor \Rd0, \Rn0, \Rm0 69 eor \Rd0, \Rn0, \Rm0
70 eor \Rd1, \Rn1, \Rm1 70 eor \Rd1, \Rn1, \Rm1
71 orr \Rn0, \Rn0, \Rm0 71 orr \Rn0, \Rn0, \Rm0
72 orr \Rn1, \Rn1, \Rm1 72 orr \Rn1, \Rn1, \Rm1
73 and \Rd0, \Rd0, \Rmask 73 and \Rd0, \Rd0, \Rmask
74 and \Rd1, \Rd1, \Rmask 74 and \Rd1, \Rd1, \Rmask
75 sub \Rd0, \Rn0, \Rd0, lsr #1 75 sub \Rd0, \Rn0, \Rd0, lsr #1
76 sub \Rd1, \Rn1, \Rd1, lsr #1 76 sub \Rd1, \Rn1, \Rd1, lsr #1
77 .endm 77 .endm
78 78
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask 79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) 80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81 @ Rmask = 0xFEFEFEFE 81 @ Rmask = 0xFEFEFEFE
82 @ Rn = destroy 82 @ Rn = destroy
83 eor \Rd0, \Rn0, \Rm0 83 eor \Rd0, \Rn0, \Rm0
84 eor \Rd1, \Rn1, \Rm1 84 eor \Rd1, \Rn1, \Rm1
85 and \Rn0, \Rn0, \Rm0 85 and \Rn0, \Rn0, \Rm0
86 and \Rn1, \Rn1, \Rm1 86 and \Rn1, \Rn1, \Rm1
87 and \Rd0, \Rd0, \Rmask 87 and \Rd0, \Rd0, \Rmask
88 and \Rd1, \Rd1, \Rmask 88 and \Rd1, \Rd1, \Rmask
89 add \Rd0, \Rn0, \Rd0, lsr #1 89 add \Rd0, \Rn0, \Rd0, lsr #1
90 add \Rd1, \Rn1, \Rd1, lsr #1 90 add \Rd1, \Rn1, \Rd1, lsr #1
91 .endm 91 .endm
92 92
93 .macro JMP_ALIGN tmp, reg 93 .macro JMP_ALIGN tmp, reg
94 ands \tmp, \reg, #3 94 ands \tmp, \reg, #3
95 bic \reg, \reg, #3 95 bic \reg, \reg, #3
96 beq 1f 96 beq 1f
97 subs \tmp, \tmp, #1 97 subs \tmp, \tmp, #1
98 beq 2f 98 beq 2f
99 subs \tmp, \tmp, #1 99 subs \tmp, \tmp, #1
100 beq 3f 100 beq 3f
101 b 4f 101 b 4f
102 .endm 102 .endm
103 103
104 @ ---------------------------------------------------------------- 104 @ ----------------------------------------------------------------
105 .align 5 105 .align 5
106 function put_pixels16_arm, export=1 106 function put_pixels16_arm, export=1
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108 @ block = word aligned, pixles = unaligned 108 @ block = word aligned, pixles = unaligned
109 pld [r1] 109 pld [r1]
110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR 110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR
111 JMP_ALIGN r5, r1 111 JMP_ALIGN r5, r1
112 1: 112 1:
113 ldmia r1, {r4-r7} 113 ldmia r1, {r4-r7}
114 add r1, r1, r2 114 add r1, r1, r2
115 stmia r0, {r4-r7} 115 stmia r0, {r4-r7}
116 pld [r1] 116 pld [r1]
117 subs r3, r3, #1 117 subs r3, r3, #1
118 add r0, r0, r2 118 add r0, r0, r2
119 bne 1b 119 bne 1b
120 ldmfd sp!, {r4-r11, pc} 120 ldmfd sp!, {r4-r11, pc}
121 .align 5 121 .align 5
122 2: 122 2:
123 ldmia r1, {r4-r8} 123 ldmia r1, {r4-r8}
124 add r1, r1, r2 124 add r1, r1, r2
125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
126 pld [r1] 126 pld [r1]
127 subs r3, r3, #1 127 subs r3, r3, #1
128 stmia r0, {r9-r12} 128 stmia r0, {r9-r12}
129 add r0, r0, r2 129 add r0, r0, r2
130 bne 2b 130 bne 2b
131 ldmfd sp!, {r4-r11, pc} 131 ldmfd sp!, {r4-r11, pc}
132 .align 5 132 .align 5
133 3: 133 3:
134 ldmia r1, {r4-r8} 134 ldmia r1, {r4-r8}
135 add r1, r1, r2 135 add r1, r1, r2
136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
137 pld [r1] 137 pld [r1]
138 subs r3, r3, #1 138 subs r3, r3, #1
139 stmia r0, {r9-r12} 139 stmia r0, {r9-r12}
140 add r0, r0, r2 140 add r0, r0, r2
141 bne 3b 141 bne 3b
142 ldmfd sp!, {r4-r11, pc} 142 ldmfd sp!, {r4-r11, pc}
143 .align 5 143 .align 5
144 4: 144 4:
145 ldmia r1, {r4-r8} 145 ldmia r1, {r4-r8}
146 add r1, r1, r2 146 add r1, r1, r2
147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
148 pld [r1] 148 pld [r1]
149 subs r3, r3, #1 149 subs r3, r3, #1
150 stmia r0, {r9-r12} 150 stmia r0, {r9-r12}
151 add r0, r0, r2 151 add r0, r0, r2
152 bne 4b 152 bne 4b
153 ldmfd sp!, {r4-r11,pc} 153 ldmfd sp!, {r4-r11,pc}
154 .endfunc 154 .endfunc
155 155
156 @ ---------------------------------------------------------------- 156 @ ----------------------------------------------------------------
157 .align 5 157 .align 5
158 function put_pixels8_arm, export=1 158 function put_pixels8_arm, export=1
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160 @ block = word aligned, pixles = unaligned 160 @ block = word aligned, pixles = unaligned
161 pld [r1] 161 pld [r1]
162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR 162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR
163 JMP_ALIGN r5, r1 163 JMP_ALIGN r5, r1
164 1: 164 1:
165 ldmia r1, {r4-r5} 165 ldmia r1, {r4-r5}
166 add r1, r1, r2 166 add r1, r1, r2
167 subs r3, r3, #1 167 subs r3, r3, #1
168 pld [r1] 168 pld [r1]
169 stmia r0, {r4-r5} 169 stmia r0, {r4-r5}
170 add r0, r0, r2 170 add r0, r0, r2
171 bne 1b 171 bne 1b
172 ldmfd sp!, {r4-r5,pc} 172 ldmfd sp!, {r4-r5,pc}
173 .align 5 173 .align 5
174 2: 174 2:
175 ldmia r1, {r4-r5, r12} 175 ldmia r1, {r4-r5, r12}
176 add r1, r1, r2 176 add r1, r1, r2
177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
178 pld [r1] 178 pld [r1]
179 subs r3, r3, #1 179 subs r3, r3, #1
180 stmia r0, {r4-r5} 180 stmia r0, {r4-r5}
181 add r0, r0, r2 181 add r0, r0, r2
182 bne 2b 182 bne 2b
183 ldmfd sp!, {r4-r5,pc} 183 ldmfd sp!, {r4-r5,pc}
184 .align 5 184 .align 5
185 3: 185 3:
186 ldmia r1, {r4-r5, r12} 186 ldmia r1, {r4-r5, r12}
187 add r1, r1, r2 187 add r1, r1, r2
188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
189 pld [r1] 189 pld [r1]
190 subs r3, r3, #1 190 subs r3, r3, #1
191 stmia r0, {r4-r5} 191 stmia r0, {r4-r5}
192 add r0, r0, r2 192 add r0, r0, r2
193 bne 3b 193 bne 3b
194 ldmfd sp!, {r4-r5,pc} 194 ldmfd sp!, {r4-r5,pc}
195 .align 5 195 .align 5
196 4: 196 4:
197 ldmia r1, {r4-r5, r12} 197 ldmia r1, {r4-r5, r12}
198 add r1, r1, r2 198 add r1, r1, r2
199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
200 pld [r1] 200 pld [r1]
201 subs r3, r3, #1 201 subs r3, r3, #1
202 stmia r0, {r4-r5} 202 stmia r0, {r4-r5}
203 add r0, r0, r2 203 add r0, r0, r2
204 bne 4b 204 bne 4b
205 ldmfd sp!, {r4-r5,pc} 205 ldmfd sp!, {r4-r5,pc}
206 .endfunc 206 .endfunc
207 207
208 @ ---------------------------------------------------------------- 208 @ ----------------------------------------------------------------
209 .align 5 209 .align 5
210 function put_pixels8_x2_arm, export=1 210 function put_pixels8_x2_arm, export=1
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212 @ block = word aligned, pixles = unaligned 212 @ block = word aligned, pixles = unaligned
213 pld [r1] 213 pld [r1]
214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR 214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
215 ldr r12, =0xfefefefe 215 ldr r12, =0xfefefefe
216 JMP_ALIGN r5, r1 216 JMP_ALIGN r5, r1
217 1: 217 1:
218 ldmia r1, {r4-r5, r10} 218 ldmia r1, {r4-r5, r10}
219 add r1, r1, r2 219 add r1, r1, r2
220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
221 pld [r1] 221 pld [r1]
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
223 subs r3, r3, #1 223 subs r3, r3, #1
224 stmia r0, {r8-r9} 224 stmia r0, {r8-r9}
225 add r0, r0, r2 225 add r0, r0, r2
226 bne 1b 226 bne 1b
227 ldmfd sp!, {r4-r10,pc} 227 ldmfd sp!, {r4-r10,pc}
228 .align 5 228 .align 5
229 2: 229 2:
230 ldmia r1, {r4-r5, r10} 230 ldmia r1, {r4-r5, r10}
231 add r1, r1, r2 231 add r1, r1, r2
232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
234 pld [r1] 234 pld [r1]
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
236 subs r3, r3, #1 236 subs r3, r3, #1
237 stmia r0, {r4-r5} 237 stmia r0, {r4-r5}
238 add r0, r0, r2 238 add r0, r0, r2
239 bne 2b 239 bne 2b
240 ldmfd sp!, {r4-r10,pc} 240 ldmfd sp!, {r4-r10,pc}
241 .align 5 241 .align 5
242 3: 242 3:
243 ldmia r1, {r4-r5, r10} 243 ldmia r1, {r4-r5, r10}
244 add r1, r1, r2 244 add r1, r1, r2
245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
247 pld [r1] 247 pld [r1]
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249 subs r3, r3, #1 249 subs r3, r3, #1
250 stmia r0, {r4-r5} 250 stmia r0, {r4-r5}
251 add r0, r0, r2 251 add r0, r0, r2
252 bne 3b 252 bne 3b
253 ldmfd sp!, {r4-r10,pc} 253 ldmfd sp!, {r4-r10,pc}
254 .align 5 254 .align 5
255 4: 255 4:
256 ldmia r1, {r4-r5, r10} 256 ldmia r1, {r4-r5, r10}
257 add r1, r1, r2 257 add r1, r1, r2
258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
259 pld [r1] 259 pld [r1]
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
261 subs r3, r3, #1 261 subs r3, r3, #1
262 stmia r0, {r8-r9} 262 stmia r0, {r8-r9}
263 add r0, r0, r2 263 add r0, r0, r2
264 bne 4b 264 bne 4b
265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. 265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
266 .endfunc 266 .endfunc
267 267
268 .align 5 268 .align 5
269 function put_no_rnd_pixels8_x2_arm, export=1 269 function put_no_rnd_pixels8_x2_arm, export=1
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271 @ block = word aligned, pixles = unaligned 271 @ block = word aligned, pixles = unaligned
272 pld [r1] 272 pld [r1]
273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR 273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR
274 ldr r12, =0xfefefefe 274 ldr r12, =0xfefefefe
275 JMP_ALIGN r5, r1 275 JMP_ALIGN r5, r1
276 1: 276 1:
277 ldmia r1, {r4-r5, r10} 277 ldmia r1, {r4-r5, r10}
278 add r1, r1, r2 278 add r1, r1, r2
279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
280 pld [r1] 280 pld [r1]
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
282 subs r3, r3, #1 282 subs r3, r3, #1
283 stmia r0, {r8-r9} 283 stmia r0, {r8-r9}
284 add r0, r0, r2 284 add r0, r0, r2
285 bne 1b 285 bne 1b
286 ldmfd sp!, {r4-r10,pc} 286 ldmfd sp!, {r4-r10,pc}
287 .align 5 287 .align 5
288 2: 288 2:
289 ldmia r1, {r4-r5, r10} 289 ldmia r1, {r4-r5, r10}
290 add r1, r1, r2 290 add r1, r1, r2
291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
293 pld [r1] 293 pld [r1]
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
295 subs r3, r3, #1 295 subs r3, r3, #1
296 stmia r0, {r4-r5} 296 stmia r0, {r4-r5}
297 add r0, r0, r2 297 add r0, r0, r2
298 bne 2b 298 bne 2b
299 ldmfd sp!, {r4-r10,pc} 299 ldmfd sp!, {r4-r10,pc}
300 .align 5 300 .align 5
301 3: 301 3:
302 ldmia r1, {r4-r5, r10} 302 ldmia r1, {r4-r5, r10}
303 add r1, r1, r2 303 add r1, r1, r2
304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
306 pld [r1] 306 pld [r1]
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
308 subs r3, r3, #1 308 subs r3, r3, #1
309 stmia r0, {r4-r5} 309 stmia r0, {r4-r5}
310 add r0, r0, r2 310 add r0, r0, r2
311 bne 3b 311 bne 3b
312 ldmfd sp!, {r4-r10,pc} 312 ldmfd sp!, {r4-r10,pc}
313 .align 5 313 .align 5
314 4: 314 4:
315 ldmia r1, {r4-r5, r10} 315 ldmia r1, {r4-r5, r10}
316 add r1, r1, r2 316 add r1, r1, r2
317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
318 pld [r1] 318 pld [r1]
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
320 subs r3, r3, #1 320 subs r3, r3, #1
321 stmia r0, {r8-r9} 321 stmia r0, {r8-r9}
322 add r0, r0, r2 322 add r0, r0, r2
323 bne 4b 323 bne 4b
324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. 324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
325 .endfunc 325 .endfunc
326 326
327 327
328 @ ---------------------------------------------------------------- 328 @ ----------------------------------------------------------------
329 .align 5 329 .align 5
330 function put_pixels8_y2_arm, export=1 330 function put_pixels8_y2_arm, export=1
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332 @ block = word aligned, pixles = unaligned 332 @ block = word aligned, pixles = unaligned
333 pld [r1] 333 pld [r1]
334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
335 mov r3, r3, lsr #1 335 mov r3, r3, lsr #1
336 ldr r12, =0xfefefefe 336 ldr r12, =0xfefefefe
337 JMP_ALIGN r5, r1 337 JMP_ALIGN r5, r1
338 1: 338 1:
339 ldmia r1, {r4-r5} 339 ldmia r1, {r4-r5}
340 add r1, r1, r2 340 add r1, r1, r2
341 6: ldmia r1, {r6-r7} 341 6: ldmia r1, {r6-r7}
342 add r1, r1, r2 342 add r1, r1, r2
343 pld [r1] 343 pld [r1]
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
345 ldmia r1, {r4-r5} 345 ldmia r1, {r4-r5}
346 add r1, r1, r2 346 add r1, r1, r2
347 stmia r0, {r8-r9} 347 stmia r0, {r8-r9}
348 add r0, r0, r2 348 add r0, r0, r2
349 pld [r1] 349 pld [r1]
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
351 subs r3, r3, #1 351 subs r3, r3, #1
352 stmia r0, {r8-r9} 352 stmia r0, {r8-r9}
353 add r0, r0, r2 353 add r0, r0, r2
354 bne 6b 354 bne 6b
355 ldmfd sp!, {r4-r11,pc} 355 ldmfd sp!, {r4-r11,pc}
356 .align 5 356 .align 5
357 2: 357 2:
358 ldmia r1, {r4-r6} 358 ldmia r1, {r4-r6}
359 add r1, r1, r2 359 add r1, r1, r2
360 pld [r1] 360 pld [r1]
361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
362 6: ldmia r1, {r7-r9} 362 6: ldmia r1, {r7-r9}
363 add r1, r1, r2 363 add r1, r1, r2
364 pld [r1] 364 pld [r1]
365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
367 stmia r0, {r10-r11} 367 stmia r0, {r10-r11}
368 add r0, r0, r2 368 add r0, r0, r2
369 ldmia r1, {r4-r6} 369 ldmia r1, {r4-r6}
370 add r1, r1, r2 370 add r1, r1, r2
371 pld [r1] 371 pld [r1]
372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
373 subs r3, r3, #1 373 subs r3, r3, #1
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
375 stmia r0, {r10-r11} 375 stmia r0, {r10-r11}
376 add r0, r0, r2 376 add r0, r0, r2
377 bne 6b 377 bne 6b
378 ldmfd sp!, {r4-r11,pc} 378 ldmfd sp!, {r4-r11,pc}
379 .align 5 379 .align 5
380 3: 380 3:
381 ldmia r1, {r4-r6} 381 ldmia r1, {r4-r6}
382 add r1, r1, r2 382 add r1, r1, r2
383 pld [r1] 383 pld [r1]
384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
385 6: ldmia r1, {r7-r9} 385 6: ldmia r1, {r7-r9}
386 add r1, r1, r2 386 add r1, r1, r2
387 pld [r1] 387 pld [r1]
388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
390 stmia r0, {r10-r11} 390 stmia r0, {r10-r11}
391 add r0, r0, r2 391 add r0, r0, r2
392 ldmia r1, {r4-r6} 392 ldmia r1, {r4-r6}
393 add r1, r1, r2 393 add r1, r1, r2
394 pld [r1] 394 pld [r1]
395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
396 subs r3, r3, #1 396 subs r3, r3, #1
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
398 stmia r0, {r10-r11} 398 stmia r0, {r10-r11}
399 add r0, r0, r2 399 add r0, r0, r2
400 bne 6b 400 bne 6b
401 ldmfd sp!, {r4-r11,pc} 401 ldmfd sp!, {r4-r11,pc}
402 .align 5 402 .align 5
403 4: 403 4:
404 ldmia r1, {r4-r6} 404 ldmia r1, {r4-r6}
405 add r1, r1, r2 405 add r1, r1, r2
406 pld [r1] 406 pld [r1]
407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
408 6: ldmia r1, {r7-r9} 408 6: ldmia r1, {r7-r9}
409 add r1, r1, r2 409 add r1, r1, r2
410 pld [r1] 410 pld [r1]
411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
413 stmia r0, {r10-r11} 413 stmia r0, {r10-r11}
414 add r0, r0, r2 414 add r0, r0, r2
415 ldmia r1, {r4-r6} 415 ldmia r1, {r4-r6}
416 add r1, r1, r2 416 add r1, r1, r2
417 pld [r1] 417 pld [r1]
418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
419 subs r3, r3, #1 419 subs r3, r3, #1
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
421 stmia r0, {r10-r11} 421 stmia r0, {r10-r11}
422 add r0, r0, r2 422 add r0, r0, r2
423 bne 6b 423 bne 6b
424 ldmfd sp!, {r4-r11,pc} 424 ldmfd sp!, {r4-r11,pc}
425 .endfunc 425 .endfunc
426 426
427 .align 5 427 .align 5
428 function put_no_rnd_pixels8_y2_arm, export=1 428 function put_no_rnd_pixels8_y2_arm, export=1
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430 @ block = word aligned, pixles = unaligned 430 @ block = word aligned, pixles = unaligned
431 pld [r1] 431 pld [r1]
432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
433 mov r3, r3, lsr #1 433 mov r3, r3, lsr #1
434 ldr r12, =0xfefefefe 434 ldr r12, =0xfefefefe
435 JMP_ALIGN r5, r1 435 JMP_ALIGN r5, r1
436 1: 436 1:
437 ldmia r1, {r4-r5} 437 ldmia r1, {r4-r5}
438 add r1, r1, r2 438 add r1, r1, r2
439 6: ldmia r1, {r6-r7} 439 6: ldmia r1, {r6-r7}
440 add r1, r1, r2 440 add r1, r1, r2
441 pld [r1] 441 pld [r1]
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
443 ldmia r1, {r4-r5} 443 ldmia r1, {r4-r5}
444 add r1, r1, r2 444 add r1, r1, r2
445 stmia r0, {r8-r9} 445 stmia r0, {r8-r9}
446 add r0, r0, r2 446 add r0, r0, r2
447 pld [r1] 447 pld [r1]
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
449 subs r3, r3, #1 449 subs r3, r3, #1
450 stmia r0, {r8-r9} 450 stmia r0, {r8-r9}
451 add r0, r0, r2 451 add r0, r0, r2
452 bne 6b 452 bne 6b
453 ldmfd sp!, {r4-r11,pc} 453 ldmfd sp!, {r4-r11,pc}
454 .align 5 454 .align 5
455 2: 455 2:
456 ldmia r1, {r4-r6} 456 ldmia r1, {r4-r6}
457 add r1, r1, r2 457 add r1, r1, r2
458 pld [r1] 458 pld [r1]
459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
460 6: ldmia r1, {r7-r9} 460 6: ldmia r1, {r7-r9}
461 add r1, r1, r2 461 add r1, r1, r2
462 pld [r1] 462 pld [r1]
463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
465 stmia r0, {r10-r11} 465 stmia r0, {r10-r11}
466 add r0, r0, r2 466 add r0, r0, r2
467 ldmia r1, {r4-r6} 467 ldmia r1, {r4-r6}
468 add r1, r1, r2 468 add r1, r1, r2
469 pld [r1] 469 pld [r1]
470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
471 subs r3, r3, #1 471 subs r3, r3, #1
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
473 stmia r0, {r10-r11} 473 stmia r0, {r10-r11}
474 add r0, r0, r2 474 add r0, r0, r2
475 bne 6b 475 bne 6b
476 ldmfd sp!, {r4-r11,pc} 476 ldmfd sp!, {r4-r11,pc}
477 .align 5 477 .align 5
478 3: 478 3:
479 ldmia r1, {r4-r6} 479 ldmia r1, {r4-r6}
480 add r1, r1, r2 480 add r1, r1, r2
481 pld [r1] 481 pld [r1]
482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
483 6: ldmia r1, {r7-r9} 483 6: ldmia r1, {r7-r9}
484 add r1, r1, r2 484 add r1, r1, r2
485 pld [r1] 485 pld [r1]
486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488 stmia r0, {r10-r11} 488 stmia r0, {r10-r11}
489 add r0, r0, r2 489 add r0, r0, r2
490 ldmia r1, {r4-r6} 490 ldmia r1, {r4-r6}
491 add r1, r1, r2 491 add r1, r1, r2
492 pld [r1] 492 pld [r1]
493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
494 subs r3, r3, #1 494 subs r3, r3, #1
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496 stmia r0, {r10-r11} 496 stmia r0, {r10-r11}
497 add r0, r0, r2 497 add r0, r0, r2
498 bne 6b 498 bne 6b
499 ldmfd sp!, {r4-r11,pc} 499 ldmfd sp!, {r4-r11,pc}
500 .align 5 500 .align 5
501 4: 501 4:
502 ldmia r1, {r4-r6} 502 ldmia r1, {r4-r6}
503 add r1, r1, r2 503 add r1, r1, r2
504 pld [r1] 504 pld [r1]
505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
506 6: ldmia r1, {r7-r9} 506 6: ldmia r1, {r7-r9}
507 add r1, r1, r2 507 add r1, r1, r2
508 pld [r1] 508 pld [r1]
509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511 stmia r0, {r10-r11} 511 stmia r0, {r10-r11}
512 add r0, r0, r2 512 add r0, r0, r2
513 ldmia r1, {r4-r6} 513 ldmia r1, {r4-r6}
514 add r1, r1, r2 514 add r1, r1, r2
515 pld [r1] 515 pld [r1]
516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
517 subs r3, r3, #1 517 subs r3, r3, #1
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519 stmia r0, {r10-r11} 519 stmia r0, {r10-r11}
520 add r0, r0, r2 520 add r0, r0, r2
521 bne 6b 521 bne 6b
522 ldmfd sp!, {r4-r11,pc} 522 ldmfd sp!, {r4-r11,pc}
523 .endfunc 523 .endfunc
524 524
525 .ltorg 525 .ltorg
526 526
527 @ ---------------------------------------------------------------- 527 @ ----------------------------------------------------------------
528 .macro RND_XY2_IT align, rnd 528 .macro RND_XY2_IT align, rnd
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) 529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) 530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531 .if \align == 0 531 .if \align == 0
532 ldmia r1, {r6-r8} 532 ldmia r1, {r6-r8}
533 .elseif \align == 3 533 .elseif \align == 3
534 ldmia r1, {r5-r7} 534 ldmia r1, {r5-r7}
535 .else 535 .else
536 ldmia r1, {r8-r10} 536 ldmia r1, {r8-r10}
537 .endif 537 .endif
538 add r1, r1, r2 538 add r1, r1, r2
539 pld [r1] 539 pld [r1]
540 .if \align == 0 540 .if \align == 0
541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
542 .elseif \align == 1 542 .elseif \align == 1
543 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10 543 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
544 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10 544 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
545 .elseif \align == 2 545 .elseif \align == 2
546 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10 546 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
547 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10 547 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
548 .elseif \align == 3 548 .elseif \align == 3
549 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7 549 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
550 .endif 550 .endif
551 ldr r14, =0x03030303 551 ldr r14, =0x03030303
552 tst r3, #1 552 tst r3, #1
553 and r8, r4, r14 553 and r8, r4, r14
554 and r9, r5, r14 554 and r9, r5, r14
555 and r10, r6, r14 555 and r10, r6, r14
556 and r11, r7, r14 556 and r11, r7, r14
557 andeq r14, r14, r14, \rnd #1 557 andeq r14, r14, r14, \rnd #1
558 add r8, r8, r10 558 add r8, r8, r10
559 add r9, r9, r11 559 add r9, r9, r11
560 ldr r12, =0xfcfcfcfc >> 2 560 ldr r12, =0xfcfcfcfc >> 2
561 addeq r8, r8, r14 561 addeq r8, r8, r14
562 addeq r9, r9, r14 562 addeq r9, r9, r14
563 and r4, r12, r4, lsr #2 563 and r4, r12, r4, lsr #2
564 and r5, r12, r5, lsr #2 564 and r5, r12, r5, lsr #2
565 and r6, r12, r6, lsr #2 565 and r6, r12, r6, lsr #2
566 and r7, r12, r7, lsr #2 566 and r7, r12, r7, lsr #2
567 add r10, r4, r6 567 add r10, r4, r6
568 add r11, r5, r7 568 add r11, r5, r7
569 subs r3, r3, #1 569 subs r3, r3, #1
570 .endm 570 .endm
571 571
572 .macro RND_XY2_EXPAND align, rnd 572 .macro RND_XY2_EXPAND align, rnd
573 RND_XY2_IT \align, \rnd 573 RND_XY2_IT \align, \rnd
574 6: stmfd sp!, {r8-r11} 574 6: stmfd sp!, {r8-r11}
575 RND_XY2_IT \align, \rnd 575 RND_XY2_IT \align, \rnd
576 ldmfd sp!, {r4-r7} 576 ldmfd sp!, {r4-r7}
577 add r4, r4, r8 577 add r4, r4, r8
578 add r5, r5, r9 578 add r5, r5, r9
579 ldr r14, =0x0f0f0f0f 579 ldr r14, =0x0f0f0f0f
580 add r6, r6, r10 580 add r6, r6, r10
581 add r7, r7, r11 581 add r7, r7, r11
582 and r4, r14, r4, lsr #2 582 and r4, r14, r4, lsr #2
583 and r5, r14, r5, lsr #2 583 and r5, r14, r5, lsr #2
584 add r4, r4, r6 584 add r4, r4, r6
585 add r5, r5, r7 585 add r5, r5, r7
586 stmia r0, {r4-r5} 586 stmia r0, {r4-r5}
587 add r0, r0, r2 587 add r0, r0, r2
588 bge 6b 588 bge 6b
589 ldmfd sp!, {r4-r11,pc} 589 ldmfd sp!, {r4-r11,pc}
590 .endm 590 .endm
591 591
592 .align 5 592 .align 5
593 function put_pixels8_xy2_arm, export=1 593 function put_pixels8_xy2_arm, export=1
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595 @ block = word aligned, pixles = unaligned 595 @ block = word aligned, pixles = unaligned
596 pld [r1] 596 pld [r1]
597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
598 JMP_ALIGN r5, r1 598 JMP_ALIGN r5, r1
599 1: 599 1:
600 RND_XY2_EXPAND 0, lsl 600 RND_XY2_EXPAND 0, lsl
601 601
602 .align 5 602 .align 5
603 2: 603 2:
604 RND_XY2_EXPAND 1, lsl 604 RND_XY2_EXPAND 1, lsl
605 605
606 .align 5 606 .align 5
607 3: 607 3:
608 RND_XY2_EXPAND 2, lsl 608 RND_XY2_EXPAND 2, lsl
609 609
610 .align 5 610 .align 5
611 4: 611 4:
612 RND_XY2_EXPAND 3, lsl 612 RND_XY2_EXPAND 3, lsl
613 .endfunc 613 .endfunc
614 614
615 .align 5 615 .align 5
616 function put_no_rnd_pixels8_xy2_arm, export=1 616 function put_no_rnd_pixels8_xy2_arm, export=1
617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618 @ block = word aligned, pixles = unaligned 618 @ block = word aligned, pixles = unaligned
619 pld [r1] 619 pld [r1]
620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR
621 JMP_ALIGN r5, r1 621 JMP_ALIGN r5, r1
622 1: 622 1:
623 RND_XY2_EXPAND 0, lsr 623 RND_XY2_EXPAND 0, lsr
624 624
625 .align 5 625 .align 5
626 2: 626 2:
627 RND_XY2_EXPAND 1, lsr 627 RND_XY2_EXPAND 1, lsr
628 628
629 .align 5 629 .align 5
630 3: 630 3:
631 RND_XY2_EXPAND 2, lsr 631 RND_XY2_EXPAND 2, lsr
632 632
633 .align 5 633 .align 5
634 4: 634 4:
635 RND_XY2_EXPAND 3, lsr 635 RND_XY2_EXPAND 3, lsr
636 .endfunc 636 .endfunc
637 637
638 .align 5 638 .align 5
639 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) 639 @ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
640 function ff_add_pixels_clamped_ARM, export=1 640 function ff_add_pixels_clamped_ARM, export=1
655 movne r6, r5, lsr #24 655 movne r6, r5, lsr #24
656 tst r8, #0x100 656 tst r8, #0x100
657 movne r8, r7, lsr #24 657 movne r8, r7, lsr #24
658 mov r9, r6 658 mov r9, r6
659 ldrsh r5, [r0, #4] /* moved form [A] */ 659 ldrsh r5, [r0, #4] /* moved form [A] */
660 orr r9, r9, r8, lsl #8 660 orr r9, r9, r8, lsl #8
661 /* block[2] and block[3] */ 661 /* block[2] and block[3] */
662 /* [A] */ 662 /* [A] */
663 ldrsh r7, [r0, #6] 663 ldrsh r7, [r0, #6]
664 and r6, r4, #0xFF0000 664 and r6, r4, #0xFF0000
665 and r8, r4, #0xFF000000 665 and r8, r4, #0xFF000000
666 add r6, r5, r6, lsr #16 666 add r6, r5, r6, lsr #16
667 add r8, r7, r8, lsr #24 667 add r8, r7, r8, lsr #24
668 mvn r5, r5 668 mvn r5, r5
669 mvn r7, r7 669 mvn r7, r7
670 tst r6, #0x100 670 tst r6, #0x100
671 movne r6, r5, lsr #24 671 movne r6, r5, lsr #24
672 tst r8, #0x100 672 tst r8, #0x100
673 movne r8, r7, lsr #24 673 movne r8, r7, lsr #24
674 orr r9, r9, r6, lsl #16 674 orr r9, r9, r6, lsl #16
675 ldr r4, [r1, #4] /* moved form [B] */ 675 ldr r4, [r1, #4] /* moved form [B] */
676 orr r9, r9, r8, lsl #24 676 orr r9, r9, r8, lsl #24
677 /* store dest */ 677 /* store dest */
678 ldrsh r5, [r0, #8] /* moved form [C] */ 678 ldrsh r5, [r0, #8] /* moved form [C] */
679 str r9, [r1] 679 str r9, [r1]
680 680
681 /* load dest */ 681 /* load dest */
684 /* [C] */ 684 /* [C] */
685 ldrsh r7, [r0, #10] 685 ldrsh r7, [r0, #10]
686 and r6, r4, #0xFF 686 and r6, r4, #0xFF
687 and r8, r4, #0xFF00 687 and r8, r4, #0xFF00
688 add r6, r5, r6 688 add r6, r5, r6
689 add r8, r7, r8, lsr #8 689 add r8, r7, r8, lsr #8
690 mvn r5, r5 690 mvn r5, r5
691 mvn r7, r7 691 mvn r7, r7
692 tst r6, #0x100 692 tst r6, #0x100
693 movne r6, r5, lsr #24 693 movne r6, r5, lsr #24
694 tst r8, #0x100 694 tst r8, #0x100
695 movne r8, r7, lsr #24 695 movne r8, r7, lsr #24
696 mov r9, r6 696 mov r9, r6
697 ldrsh r5, [r0, #12] /* moved from [D] */ 697 ldrsh r5, [r0, #12] /* moved from [D] */
698 orr r9, r9, r8, lsl #8 698 orr r9, r9, r8, lsl #8
699 /* block[6] and block[7] */ 699 /* block[6] and block[7] */
700 /* [D] */ 700 /* [D] */
701 ldrsh r7, [r0, #14] 701 ldrsh r7, [r0, #14]
702 and r6, r4, #0xFF0000 702 and r6, r4, #0xFF0000
703 and r8, r4, #0xFF000000 703 and r8, r4, #0xFF000000
704 add r6, r5, r6, lsr #16 704 add r6, r5, r6, lsr #16
705 add r8, r7, r8, lsr #24 705 add r8, r7, r8, lsr #24
706 mvn r5, r5 706 mvn r5, r5
707 mvn r7, r7 707 mvn r7, r7
708 tst r6, #0x100 708 tst r6, #0x100
709 movne r6, r5, lsr #24 709 movne r6, r5, lsr #24
710 tst r8, #0x100 710 tst r8, #0x100
711 movne r8, r7, lsr #24 711 movne r8, r7, lsr #24
712 orr r9, r9, r6, lsl #16 712 orr r9, r9, r6, lsl #16
713 add r0, r0, #16 /* moved from [E] */ 713 add r0, r0, #16 /* moved from [E] */
714 orr r9, r9, r8, lsl #24 714 orr r9, r9, r8, lsl #24
715 subs r10, r10, #1 /* moved from [F] */ 715 subs r10, r10, #1 /* moved from [F] */
716 /* store dest */ 716 /* store dest */
717 str r9, [r1, #4] 717 str r9, [r1, #4]
718 718
719 /* [E] */ 719 /* [E] */