Mercurial > libavcodec.hg
comparison arm/h264dsp_neon.S @ 8626:8d425ee85ddb libavcodec
ARM: simplify ff_put/avg_h264_chroma_mc4/8_neon definitions, no code change
author | mru |
---|---|
date | Sun, 18 Jan 2009 20:43:11 +0000 |
parents | 9281a8a9387a |
children | 23f7711e777e |
comparison
equal
deleted
inserted
replaced
8625:6f1b210e58d1 | 8626:8d425ee85ddb |
---|---|
54 vtrn.16 \r4, \r5 | 54 vtrn.16 \r4, \r5 |
55 vtrn.16 \r6, \r7 | 55 vtrn.16 \r6, \r7 |
56 .endm | 56 .endm |
57 | 57 |
58 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ | 58 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ |
59 .macro h264_chroma_mc8 avg=0 | 59 .macro h264_chroma_mc8 type |
60 function ff_\type\()_h264_chroma_mc8_neon, export=1 | |
60 push {r4-r7, lr} | 61 push {r4-r7, lr} |
61 ldrd r4, [sp, #20] | 62 ldrd r4, [sp, #20] |
62 .if \avg | 63 .ifc \type,avg |
63 mov lr, r0 | 64 mov lr, r0 |
64 .endif | 65 .endif |
65 pld [r1] | 66 pld [r1] |
66 pld [r1, r2] | 67 pld [r1, r2] |
67 | 68 |
101 vmlal.u8 q9, d5, d3 | 102 vmlal.u8 q9, d5, d3 |
102 vrshrn.u16 d16, q8, #6 | 103 vrshrn.u16 d16, q8, #6 |
103 vld1.64 {d6, d7}, [r5], r4 | 104 vld1.64 {d6, d7}, [r5], r4 |
104 pld [r1] | 105 pld [r1] |
105 vrshrn.u16 d17, q9, #6 | 106 vrshrn.u16 d17, q9, #6 |
106 .if \avg | 107 .ifc \type,avg |
107 vld1.64 {d20}, [lr,:64], r2 | 108 vld1.64 {d20}, [lr,:64], r2 |
108 vld1.64 {d21}, [lr,:64], r2 | 109 vld1.64 {d21}, [lr,:64], r2 |
109 vrhadd.u8 q8, q8, q10 | 110 vrhadd.u8 q8, q8, q10 |
110 .endif | 111 .endif |
111 vext.8 d7, d6, d7, #1 | 112 vext.8 d7, d6, d7, #1 |
134 vmull.u8 q9, d6, d0 | 135 vmull.u8 q9, d6, d0 |
135 vmlal.u8 q9, d4, d1 | 136 vmlal.u8 q9, d4, d1 |
136 vld1.64 {d6}, [r5], r4 | 137 vld1.64 {d6}, [r5], r4 |
137 vrshrn.u16 d16, q8, #6 | 138 vrshrn.u16 d16, q8, #6 |
138 vrshrn.u16 d17, q9, #6 | 139 vrshrn.u16 d17, q9, #6 |
139 .if \avg | 140 .ifc \type,avg |
140 vld1.64 {d20}, [lr,:64], r2 | 141 vld1.64 {d20}, [lr,:64], r2 |
141 vld1.64 {d21}, [lr,:64], r2 | 142 vld1.64 {d21}, [lr,:64], r2 |
142 vrhadd.u8 q8, q8, q10 | 143 vrhadd.u8 q8, q8, q10 |
143 .endif | 144 .endif |
144 subs r3, r3, #2 | 145 subs r3, r3, #2 |
163 vmlal.u8 q9, d7, d1 | 164 vmlal.u8 q9, d7, d1 |
164 pld [r1] | 165 pld [r1] |
165 vext.8 d5, d4, d5, #1 | 166 vext.8 d5, d4, d5, #1 |
166 vrshrn.u16 d16, q8, #6 | 167 vrshrn.u16 d16, q8, #6 |
167 vrshrn.u16 d17, q9, #6 | 168 vrshrn.u16 d17, q9, #6 |
168 .if \avg | 169 .ifc \type,avg |
169 vld1.64 {d20}, [lr,:64], r2 | 170 vld1.64 {d20}, [lr,:64], r2 |
170 vld1.64 {d21}, [lr,:64], r2 | 171 vld1.64 {d21}, [lr,:64], r2 |
171 vrhadd.u8 q8, q8, q10 | 172 vrhadd.u8 q8, q8, q10 |
172 .endif | 173 .endif |
173 vld1.64 {d6, d7}, [r1], r2 | 174 vld1.64 {d6, d7}, [r1], r2 |
175 vst1.64 {d16}, [r0,:64], r2 | 176 vst1.64 {d16}, [r0,:64], r2 |
176 vst1.64 {d17}, [r0,:64], r2 | 177 vst1.64 {d17}, [r0,:64], r2 |
177 bgt 5b | 178 bgt 5b |
178 | 179 |
179 pop {r4-r7, pc} | 180 pop {r4-r7, pc} |
181 .endfunc | |
180 .endm | 182 .endm |
181 | 183 |
182 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ | 184 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ |
183 .macro h264_chroma_mc4 avg=0 | 185 .macro h264_chroma_mc4 type |
186 function ff_\type\()_h264_chroma_mc4_neon, export=1 | |
184 push {r4-r7, lr} | 187 push {r4-r7, lr} |
185 ldrd r4, [sp, #20] | 188 ldrd r4, [sp, #20] |
186 .if \avg | 189 .ifc \type,avg |
187 mov lr, r0 | 190 mov lr, r0 |
188 .endif | 191 .endif |
189 pld [r1] | 192 pld [r1] |
190 pld [r1, r2] | 193 pld [r1, r2] |
191 | 194 |
228 vadd.i16 d16, d16, d17 | 231 vadd.i16 d16, d16, d17 |
229 vadd.i16 d17, d18, d19 | 232 vadd.i16 d17, d18, d19 |
230 vrshrn.u16 d16, q8, #6 | 233 vrshrn.u16 d16, q8, #6 |
231 subs r3, r3, #2 | 234 subs r3, r3, #2 |
232 pld [r1] | 235 pld [r1] |
233 .if \avg | 236 .ifc \type,avg |
234 vld1.32 {d20[0]}, [lr,:32], r2 | 237 vld1.32 {d20[0]}, [lr,:32], r2 |
235 vld1.32 {d20[1]}, [lr,:32], r2 | 238 vld1.32 {d20[1]}, [lr,:32], r2 |
236 vrhadd.u8 d16, d16, d20 | 239 vrhadd.u8 d16, d16, d20 |
237 .endif | 240 .endif |
238 vext.8 d7, d6, d7, #1 | 241 vext.8 d7, d6, d7, #1 |
263 vmull.u8 q9, d4, d1 | 266 vmull.u8 q9, d4, d1 |
264 vld1.32 {d4[1]}, [r5], r4 | 267 vld1.32 {d4[1]}, [r5], r4 |
265 vadd.i16 d16, d16, d17 | 268 vadd.i16 d16, d16, d17 |
266 vadd.i16 d17, d18, d19 | 269 vadd.i16 d17, d18, d19 |
267 vrshrn.u16 d16, q8, #6 | 270 vrshrn.u16 d16, q8, #6 |
268 .if \avg | 271 .ifc \type,avg |
269 vld1.32 {d20[0]}, [lr,:32], r2 | 272 vld1.32 {d20[0]}, [lr,:32], r2 |
270 vld1.32 {d20[1]}, [lr,:32], r2 | 273 vld1.32 {d20[1]}, [lr,:32], r2 |
271 vrhadd.u8 d16, d16, d20 | 274 vrhadd.u8 d16, d16, d20 |
272 .endif | 275 .endif |
273 subs r3, r3, #2 | 276 subs r3, r3, #2 |
293 vtrn.32 d4, d5 | 296 vtrn.32 d4, d5 |
294 vadd.i16 d16, d16, d17 | 297 vadd.i16 d16, d16, d17 |
295 vadd.i16 d17, d18, d19 | 298 vadd.i16 d17, d18, d19 |
296 pld [r1] | 299 pld [r1] |
297 vrshrn.u16 d16, q8, #6 | 300 vrshrn.u16 d16, q8, #6 |
298 .if \avg | 301 .ifc \type,avg |
299 vld1.32 {d20[0]}, [lr,:32], r2 | 302 vld1.32 {d20[0]}, [lr,:32], r2 |
300 vld1.32 {d20[1]}, [lr,:32], r2 | 303 vld1.32 {d20[1]}, [lr,:32], r2 |
301 vrhadd.u8 d16, d16, d20 | 304 vrhadd.u8 d16, d16, d20 |
302 .endif | 305 .endif |
303 vld1.64 {d6}, [r1], r2 | 306 vld1.64 {d6}, [r1], r2 |
307 vst1.32 {d16[0]}, [r0,:32], r2 | 310 vst1.32 {d16[0]}, [r0,:32], r2 |
308 vst1.32 {d16[1]}, [r0,:32], r2 | 311 vst1.32 {d16[1]}, [r0,:32], r2 |
309 bgt 5b | 312 bgt 5b |
310 | 313 |
311 pop {r4-r7, pc} | 314 pop {r4-r7, pc} |
315 .endfunc | |
312 .endm | 316 .endm |
313 | 317 |
314 .text | 318 .text |
315 .align | 319 .align |
316 | 320 |
317 function ff_put_h264_chroma_mc8_neon, export=1 | 321 h264_chroma_mc8 put |
318 h264_chroma_mc8 | 322 h264_chroma_mc8 avg |
319 .endfunc | 323 h264_chroma_mc4 put |
320 | 324 h264_chroma_mc4 avg |
321 function ff_avg_h264_chroma_mc8_neon, export=1 | |
322 h264_chroma_mc8 avg=1 | |
323 .endfunc | |
324 | |
325 function ff_put_h264_chroma_mc4_neon, export=1 | |
326 h264_chroma_mc4 | |
327 .endfunc | |
328 | |
329 function ff_avg_h264_chroma_mc4_neon, export=1 | |
330 h264_chroma_mc4 avg=1 | |
331 .endfunc | |
332 | 325 |
333 /* H.264 loop filter */ | 326 /* H.264 loop filter */ |
334 | 327 |
335 .macro h264_loop_filter_start | 328 .macro h264_loop_filter_start |
336 ldr ip, [sp] | 329 ldr ip, [sp] |