comparison arm/h264dsp_neon.S @ 8626:8d425ee85ddb libavcodec

ARM: simplify ff_put/avg_h264_chroma_mc4/8_neon definitions, no code change
author mru
date Sun, 18 Jan 2009 20:43:11 +0000
parents 9281a8a9387a
children 23f7711e777e
comparison
equal deleted inserted replaced
8625:6f1b210e58d1 8626:8d425ee85ddb
54 vtrn.16 \r4, \r5 54 vtrn.16 \r4, \r5
55 vtrn.16 \r6, \r7 55 vtrn.16 \r6, \r7
56 .endm 56 .endm
57 57
58 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ 58 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
59 .macro h264_chroma_mc8 avg=0 59 .macro h264_chroma_mc8 type
60 function ff_\type\()_h264_chroma_mc8_neon, export=1
60 push {r4-r7, lr} 61 push {r4-r7, lr}
61 ldrd r4, [sp, #20] 62 ldrd r4, [sp, #20]
62 .if \avg 63 .ifc \type,avg
63 mov lr, r0 64 mov lr, r0
64 .endif 65 .endif
65 pld [r1] 66 pld [r1]
66 pld [r1, r2] 67 pld [r1, r2]
67 68
101 vmlal.u8 q9, d5, d3 102 vmlal.u8 q9, d5, d3
102 vrshrn.u16 d16, q8, #6 103 vrshrn.u16 d16, q8, #6
103 vld1.64 {d6, d7}, [r5], r4 104 vld1.64 {d6, d7}, [r5], r4
104 pld [r1] 105 pld [r1]
105 vrshrn.u16 d17, q9, #6 106 vrshrn.u16 d17, q9, #6
106 .if \avg 107 .ifc \type,avg
107 vld1.64 {d20}, [lr,:64], r2 108 vld1.64 {d20}, [lr,:64], r2
108 vld1.64 {d21}, [lr,:64], r2 109 vld1.64 {d21}, [lr,:64], r2
109 vrhadd.u8 q8, q8, q10 110 vrhadd.u8 q8, q8, q10
110 .endif 111 .endif
111 vext.8 d7, d6, d7, #1 112 vext.8 d7, d6, d7, #1
134 vmull.u8 q9, d6, d0 135 vmull.u8 q9, d6, d0
135 vmlal.u8 q9, d4, d1 136 vmlal.u8 q9, d4, d1
136 vld1.64 {d6}, [r5], r4 137 vld1.64 {d6}, [r5], r4
137 vrshrn.u16 d16, q8, #6 138 vrshrn.u16 d16, q8, #6
138 vrshrn.u16 d17, q9, #6 139 vrshrn.u16 d17, q9, #6
139 .if \avg 140 .ifc \type,avg
140 vld1.64 {d20}, [lr,:64], r2 141 vld1.64 {d20}, [lr,:64], r2
141 vld1.64 {d21}, [lr,:64], r2 142 vld1.64 {d21}, [lr,:64], r2
142 vrhadd.u8 q8, q8, q10 143 vrhadd.u8 q8, q8, q10
143 .endif 144 .endif
144 subs r3, r3, #2 145 subs r3, r3, #2
163 vmlal.u8 q9, d7, d1 164 vmlal.u8 q9, d7, d1
164 pld [r1] 165 pld [r1]
165 vext.8 d5, d4, d5, #1 166 vext.8 d5, d4, d5, #1
166 vrshrn.u16 d16, q8, #6 167 vrshrn.u16 d16, q8, #6
167 vrshrn.u16 d17, q9, #6 168 vrshrn.u16 d17, q9, #6
168 .if \avg 169 .ifc \type,avg
169 vld1.64 {d20}, [lr,:64], r2 170 vld1.64 {d20}, [lr,:64], r2
170 vld1.64 {d21}, [lr,:64], r2 171 vld1.64 {d21}, [lr,:64], r2
171 vrhadd.u8 q8, q8, q10 172 vrhadd.u8 q8, q8, q10
172 .endif 173 .endif
173 vld1.64 {d6, d7}, [r1], r2 174 vld1.64 {d6, d7}, [r1], r2
175 vst1.64 {d16}, [r0,:64], r2 176 vst1.64 {d16}, [r0,:64], r2
176 vst1.64 {d17}, [r0,:64], r2 177 vst1.64 {d17}, [r0,:64], r2
177 bgt 5b 178 bgt 5b
178 179
179 pop {r4-r7, pc} 180 pop {r4-r7, pc}
181 .endfunc
180 .endm 182 .endm
181 183
182 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ 184 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
183 .macro h264_chroma_mc4 avg=0 185 .macro h264_chroma_mc4 type
186 function ff_\type\()_h264_chroma_mc4_neon, export=1
184 push {r4-r7, lr} 187 push {r4-r7, lr}
185 ldrd r4, [sp, #20] 188 ldrd r4, [sp, #20]
186 .if \avg 189 .ifc \type,avg
187 mov lr, r0 190 mov lr, r0
188 .endif 191 .endif
189 pld [r1] 192 pld [r1]
190 pld [r1, r2] 193 pld [r1, r2]
191 194
228 vadd.i16 d16, d16, d17 231 vadd.i16 d16, d16, d17
229 vadd.i16 d17, d18, d19 232 vadd.i16 d17, d18, d19
230 vrshrn.u16 d16, q8, #6 233 vrshrn.u16 d16, q8, #6
231 subs r3, r3, #2 234 subs r3, r3, #2
232 pld [r1] 235 pld [r1]
233 .if \avg 236 .ifc \type,avg
234 vld1.32 {d20[0]}, [lr,:32], r2 237 vld1.32 {d20[0]}, [lr,:32], r2
235 vld1.32 {d20[1]}, [lr,:32], r2 238 vld1.32 {d20[1]}, [lr,:32], r2
236 vrhadd.u8 d16, d16, d20 239 vrhadd.u8 d16, d16, d20
237 .endif 240 .endif
238 vext.8 d7, d6, d7, #1 241 vext.8 d7, d6, d7, #1
263 vmull.u8 q9, d4, d1 266 vmull.u8 q9, d4, d1
264 vld1.32 {d4[1]}, [r5], r4 267 vld1.32 {d4[1]}, [r5], r4
265 vadd.i16 d16, d16, d17 268 vadd.i16 d16, d16, d17
266 vadd.i16 d17, d18, d19 269 vadd.i16 d17, d18, d19
267 vrshrn.u16 d16, q8, #6 270 vrshrn.u16 d16, q8, #6
268 .if \avg 271 .ifc \type,avg
269 vld1.32 {d20[0]}, [lr,:32], r2 272 vld1.32 {d20[0]}, [lr,:32], r2
270 vld1.32 {d20[1]}, [lr,:32], r2 273 vld1.32 {d20[1]}, [lr,:32], r2
271 vrhadd.u8 d16, d16, d20 274 vrhadd.u8 d16, d16, d20
272 .endif 275 .endif
273 subs r3, r3, #2 276 subs r3, r3, #2
293 vtrn.32 d4, d5 296 vtrn.32 d4, d5
294 vadd.i16 d16, d16, d17 297 vadd.i16 d16, d16, d17
295 vadd.i16 d17, d18, d19 298 vadd.i16 d17, d18, d19
296 pld [r1] 299 pld [r1]
297 vrshrn.u16 d16, q8, #6 300 vrshrn.u16 d16, q8, #6
298 .if \avg 301 .ifc \type,avg
299 vld1.32 {d20[0]}, [lr,:32], r2 302 vld1.32 {d20[0]}, [lr,:32], r2
300 vld1.32 {d20[1]}, [lr,:32], r2 303 vld1.32 {d20[1]}, [lr,:32], r2
301 vrhadd.u8 d16, d16, d20 304 vrhadd.u8 d16, d16, d20
302 .endif 305 .endif
303 vld1.64 {d6}, [r1], r2 306 vld1.64 {d6}, [r1], r2
307 vst1.32 {d16[0]}, [r0,:32], r2 310 vst1.32 {d16[0]}, [r0,:32], r2
308 vst1.32 {d16[1]}, [r0,:32], r2 311 vst1.32 {d16[1]}, [r0,:32], r2
309 bgt 5b 312 bgt 5b
310 313
311 pop {r4-r7, pc} 314 pop {r4-r7, pc}
315 .endfunc
312 .endm 316 .endm
313 317
314 .text 318 .text
315 .align 319 .align
316 320
317 function ff_put_h264_chroma_mc8_neon, export=1 321 h264_chroma_mc8 put
318 h264_chroma_mc8 322 h264_chroma_mc8 avg
319 .endfunc 323 h264_chroma_mc4 put
320 324 h264_chroma_mc4 avg
321 function ff_avg_h264_chroma_mc8_neon, export=1
322 h264_chroma_mc8 avg=1
323 .endfunc
324
325 function ff_put_h264_chroma_mc4_neon, export=1
326 h264_chroma_mc4
327 .endfunc
328
329 function ff_avg_h264_chroma_mc4_neon, export=1
330 h264_chroma_mc4 avg=1
331 .endfunc
332 325
333 /* H.264 loop filter */ 326 /* H.264 loop filter */
334 327
335 .macro h264_loop_filter_start 328 .macro h264_loop_filter_start
336 ldr ip, [sp] 329 ldr ip, [sp]