Mercurial > libavcodec.hg
comparison arm/vp3dsp_neon.S @ 11443:361a5fcb4393 libavcodec
ARM: set size of asm functions in object files
author | mru |
---|---|
date | Tue, 09 Mar 2010 16:17:56 +0000 |
parents | 2c1c28f26a27 |
children | f7281af560fe |
comparison
equal
deleted
inserted
replaced
11442:fe32d9ba1c86 | 11443:361a5fcb4393 |
---|---|
72 vp3_loop_filter | 72 vp3_loop_filter |
73 | 73 |
74 vst1.64 {d0}, [ip,:64], r1 | 74 vst1.64 {d0}, [ip,:64], r1 |
75 vst1.64 {d1}, [ip,:64], r1 | 75 vst1.64 {d1}, [ip,:64], r1 |
76 bx lr | 76 bx lr |
77 .endfunc | 77 endfunc |
78 | 78 |
79 function ff_vp3_h_loop_filter_neon, export=1 | 79 function ff_vp3_h_loop_filter_neon, export=1 |
80 sub ip, r0, #1 | 80 sub ip, r0, #1 |
81 sub r0, r0, #2 | 81 sub r0, r0, #2 |
82 vld1.32 {d16[]}, [r0], r1 | 82 vld1.32 {d16[]}, [r0], r1 |
105 vst1.16 {d0[2]}, [ip], r1 | 105 vst1.16 {d0[2]}, [ip], r1 |
106 vst1.16 {d1[2]}, [ip], r1 | 106 vst1.16 {d1[2]}, [ip], r1 |
107 vst1.16 {d0[3]}, [ip], r1 | 107 vst1.16 {d0[3]}, [ip], r1 |
108 vst1.16 {d1[3]}, [ip], r1 | 108 vst1.16 {d1[3]}, [ip], r1 |
109 bx lr | 109 bx lr |
110 .endfunc | 110 endfunc |
111 | 111 |
112 | 112 |
113 function vp3_idct_start_neon | 113 function vp3_idct_start_neon |
114 vpush {d8-d15} | 114 vpush {d8-d15} |
115 movrel r3, vp3_idct_constants | 115 movrel r3, vp3_idct_constants |
118 vld1.64 {d20-d23}, [r2,:128]! | 118 vld1.64 {d20-d23}, [r2,:128]! |
119 vld1.64 {d24-d27}, [r2,:128]! | 119 vld1.64 {d24-d27}, [r2,:128]! |
120 vadd.s16 q1, q8, q12 | 120 vadd.s16 q1, q8, q12 |
121 vsub.s16 q8, q8, q12 | 121 vsub.s16 q8, q8, q12 |
122 vld1.64 {d28-d31}, [r2,:128]! | 122 vld1.64 {d28-d31}, [r2,:128]! |
123 .endfunc | 123 endfunc |
124 | 124 |
125 function vp3_idct_core_neon | 125 function vp3_idct_core_neon |
126 vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16 | 126 vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16 |
127 vmull.s16 q3, d19, xC1S7 | 127 vmull.s16 q3, d19, xC1S7 |
128 vmull.s16 q4, d2, xC4S4 // ((ip[0] + ip[4]) * C4) << 16 | 128 vmull.s16 q4, d2, xC4S4 // ((ip[0] + ip[4]) * C4) << 16 |
209 vshrn.s32 d9, q7, #16 | 209 vshrn.s32 d9, q7, #16 |
210 vadd.s16 q3, q3, q14 // ip[6] * C2 | 210 vadd.s16 q3, q3, q14 // ip[6] * C2 |
211 vadd.s16 q10, q1, q2 // Ad = (A - C) * C4 | 211 vadd.s16 q10, q1, q2 // Ad = (A - C) * C4 |
212 vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2 | 212 vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2 |
213 bx lr | 213 bx lr |
214 .endfunc | 214 endfunc |
215 | 215 |
216 .macro VP3_IDCT_END type | 216 .macro VP3_IDCT_END type |
217 function vp3_idct_end_\type\()_neon | 217 function vp3_idct_end_\type\()_neon |
218 .ifc \type, col | 218 .ifc \type, col |
219 vdup.16 q0, r3 | 219 vdup.16 q0, r3 |
257 vswp d21, d28 | 257 vswp d21, d28 |
258 vsub.s16 q8, q8, q12 | 258 vsub.s16 q8, q8, q12 |
259 vswp d23, d30 | 259 vswp d23, d30 |
260 .endif | 260 .endif |
261 bx lr | 261 bx lr |
262 .endfunc | 262 endfunc |
263 .endm | 263 .endm |
264 | 264 |
265 VP3_IDCT_END row | 265 VP3_IDCT_END row |
266 VP3_IDCT_END col | 266 VP3_IDCT_END col |
267 | 267 |
287 vst1.64 {d20-d23}, [r0,:128]! | 287 vst1.64 {d20-d23}, [r0,:128]! |
288 vshr.s16 q15, q15, #4 | 288 vshr.s16 q15, q15, #4 |
289 vst1.64 {d24-d27}, [r0,:128]! | 289 vst1.64 {d24-d27}, [r0,:128]! |
290 vst1.64 {d28-d31}, [r0,:128]! | 290 vst1.64 {d28-d31}, [r0,:128]! |
291 bx lr | 291 bx lr |
292 .endfunc | 292 endfunc |
293 | 293 |
294 function ff_vp3_idct_put_neon, export=1 | 294 function ff_vp3_idct_put_neon, export=1 |
295 mov ip, lr | 295 mov ip, lr |
296 bl vp3_idct_start_neon | 296 bl vp3_idct_start_neon |
297 bl vp3_idct_end_row_neon | 297 bl vp3_idct_end_row_neon |
317 vst1.64 {d4}, [r0,:64], r1 | 317 vst1.64 {d4}, [r0,:64], r1 |
318 vst1.64 {d5}, [r0,:64], r1 | 318 vst1.64 {d5}, [r0,:64], r1 |
319 vst1.64 {d6}, [r0,:64], r1 | 319 vst1.64 {d6}, [r0,:64], r1 |
320 vst1.64 {d7}, [r0,:64], r1 | 320 vst1.64 {d7}, [r0,:64], r1 |
321 bx lr | 321 bx lr |
322 .endfunc | 322 endfunc |
323 | 323 |
324 function ff_vp3_idct_add_neon, export=1 | 324 function ff_vp3_idct_add_neon, export=1 |
325 mov ip, lr | 325 mov ip, lr |
326 bl vp3_idct_start_neon | 326 bl vp3_idct_start_neon |
327 bl vp3_idct_end_row_neon | 327 bl vp3_idct_end_row_neon |
371 vst1.64 {d5}, [r2,:64], r1 | 371 vst1.64 {d5}, [r2,:64], r1 |
372 vqmovun.s16 d7, q15 | 372 vqmovun.s16 d7, q15 |
373 vst1.64 {d6}, [r2,:64], r1 | 373 vst1.64 {d6}, [r2,:64], r1 |
374 vst1.64 {d7}, [r2,:64], r1 | 374 vst1.64 {d7}, [r2,:64], r1 |
375 bx lr | 375 bx lr |
376 .endfunc | 376 endfunc |