comparison arm/vp3dsp_neon.S @ 11443:361a5fcb4393 libavcodec

ARM: set size of asm functions in object files
author mru
date Tue, 09 Mar 2010 16:17:56 +0000
parents 2c1c28f26a27
children f7281af560fe
comparison
equal deleted inserted replaced
11442:fe32d9ba1c86 11443:361a5fcb4393
72 vp3_loop_filter 72 vp3_loop_filter
73 73
74 vst1.64 {d0}, [ip,:64], r1 74 vst1.64 {d0}, [ip,:64], r1
75 vst1.64 {d1}, [ip,:64], r1 75 vst1.64 {d1}, [ip,:64], r1
76 bx lr 76 bx lr
77 .endfunc 77 endfunc
78 78
79 function ff_vp3_h_loop_filter_neon, export=1 79 function ff_vp3_h_loop_filter_neon, export=1
80 sub ip, r0, #1 80 sub ip, r0, #1
81 sub r0, r0, #2 81 sub r0, r0, #2
82 vld1.32 {d16[]}, [r0], r1 82 vld1.32 {d16[]}, [r0], r1
105 vst1.16 {d0[2]}, [ip], r1 105 vst1.16 {d0[2]}, [ip], r1
106 vst1.16 {d1[2]}, [ip], r1 106 vst1.16 {d1[2]}, [ip], r1
107 vst1.16 {d0[3]}, [ip], r1 107 vst1.16 {d0[3]}, [ip], r1
108 vst1.16 {d1[3]}, [ip], r1 108 vst1.16 {d1[3]}, [ip], r1
109 bx lr 109 bx lr
110 .endfunc 110 endfunc
111 111
112 112
113 function vp3_idct_start_neon 113 function vp3_idct_start_neon
114 vpush {d8-d15} 114 vpush {d8-d15}
115 movrel r3, vp3_idct_constants 115 movrel r3, vp3_idct_constants
118 vld1.64 {d20-d23}, [r2,:128]! 118 vld1.64 {d20-d23}, [r2,:128]!
119 vld1.64 {d24-d27}, [r2,:128]! 119 vld1.64 {d24-d27}, [r2,:128]!
120 vadd.s16 q1, q8, q12 120 vadd.s16 q1, q8, q12
121 vsub.s16 q8, q8, q12 121 vsub.s16 q8, q8, q12
122 vld1.64 {d28-d31}, [r2,:128]! 122 vld1.64 {d28-d31}, [r2,:128]!
123 .endfunc 123 endfunc
124 124
125 function vp3_idct_core_neon 125 function vp3_idct_core_neon
126 vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16 126 vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
127 vmull.s16 q3, d19, xC1S7 127 vmull.s16 q3, d19, xC1S7
128 vmull.s16 q4, d2, xC4S4 // ((ip[0] + ip[4]) * C4) << 16 128 vmull.s16 q4, d2, xC4S4 // ((ip[0] + ip[4]) * C4) << 16
209 vshrn.s32 d9, q7, #16 209 vshrn.s32 d9, q7, #16
210 vadd.s16 q3, q3, q14 // ip[6] * C2 210 vadd.s16 q3, q3, q14 // ip[6] * C2
211 vadd.s16 q10, q1, q2 // Ad = (A - C) * C4 211 vadd.s16 q10, q1, q2 // Ad = (A - C) * C4
212 vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2 212 vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2
213 bx lr 213 bx lr
214 .endfunc 214 endfunc
215 215
216 .macro VP3_IDCT_END type 216 .macro VP3_IDCT_END type
217 function vp3_idct_end_\type\()_neon 217 function vp3_idct_end_\type\()_neon
218 .ifc \type, col 218 .ifc \type, col
219 vdup.16 q0, r3 219 vdup.16 q0, r3
257 vswp d21, d28 257 vswp d21, d28
258 vsub.s16 q8, q8, q12 258 vsub.s16 q8, q8, q12
259 vswp d23, d30 259 vswp d23, d30
260 .endif 260 .endif
261 bx lr 261 bx lr
262 .endfunc 262 endfunc
263 .endm 263 .endm
264 264
265 VP3_IDCT_END row 265 VP3_IDCT_END row
266 VP3_IDCT_END col 266 VP3_IDCT_END col
267 267
287 vst1.64 {d20-d23}, [r0,:128]! 287 vst1.64 {d20-d23}, [r0,:128]!
288 vshr.s16 q15, q15, #4 288 vshr.s16 q15, q15, #4
289 vst1.64 {d24-d27}, [r0,:128]! 289 vst1.64 {d24-d27}, [r0,:128]!
290 vst1.64 {d28-d31}, [r0,:128]! 290 vst1.64 {d28-d31}, [r0,:128]!
291 bx lr 291 bx lr
292 .endfunc 292 endfunc
293 293
294 function ff_vp3_idct_put_neon, export=1 294 function ff_vp3_idct_put_neon, export=1
295 mov ip, lr 295 mov ip, lr
296 bl vp3_idct_start_neon 296 bl vp3_idct_start_neon
297 bl vp3_idct_end_row_neon 297 bl vp3_idct_end_row_neon
317 vst1.64 {d4}, [r0,:64], r1 317 vst1.64 {d4}, [r0,:64], r1
318 vst1.64 {d5}, [r0,:64], r1 318 vst1.64 {d5}, [r0,:64], r1
319 vst1.64 {d6}, [r0,:64], r1 319 vst1.64 {d6}, [r0,:64], r1
320 vst1.64 {d7}, [r0,:64], r1 320 vst1.64 {d7}, [r0,:64], r1
321 bx lr 321 bx lr
322 .endfunc 322 endfunc
323 323
324 function ff_vp3_idct_add_neon, export=1 324 function ff_vp3_idct_add_neon, export=1
325 mov ip, lr 325 mov ip, lr
326 bl vp3_idct_start_neon 326 bl vp3_idct_start_neon
327 bl vp3_idct_end_row_neon 327 bl vp3_idct_end_row_neon
371 vst1.64 {d5}, [r2,:64], r1 371 vst1.64 {d5}, [r2,:64], r1
372 vqmovun.s16 d7, q15 372 vqmovun.s16 d7, q15
373 vst1.64 {d6}, [r2,:64], r1 373 vst1.64 {d6}, [r2,:64], r1
374 vst1.64 {d7}, [r2,:64], r1 374 vst1.64 {d7}, [r2,:64], r1
375 bx lr 375 bx lr
376 .endfunc 376 endfunc