Mercurial > libavcodec.hg
comparison arm/fft_neon.S @ 11443:361a5fcb4393 libavcodec
ARM: set size of asm functions in object files
author | mru |
---|---|
date | Tue, 09 Mar 2010 16:17:56 +0000 |
parents | f12b7ea2df2a |
children | c80c7a717156 |
comparison
equal
deleted
inserted
replaced
11442:fe32d9ba1c86 | 11443:361a5fcb4393 |
---|---|
41 vsub.f32 d2, d4, d5 | 41 vsub.f32 d2, d4, d5 |
42 | 42 |
43 vst1.32 {d0-d3}, [r0,:128] | 43 vst1.32 {d0-d3}, [r0,:128] |
44 | 44 |
45 bx lr | 45 bx lr |
46 .endfunc | 46 endfunc |
47 | 47 |
48 function fft8_neon | 48 function fft8_neon |
49 mov r1, r0 | 49 mov r1, r0 |
50 vld1.32 {d0-d3}, [r1,:128]! | 50 vld1.32 {d0-d3}, [r1,:128]! |
51 vld1.32 {d16-d19}, [r1,:128] | 51 vld1.32 {d16-d19}, [r1,:128] |
94 | 94 |
95 vst1.32 {d16-d19}, [r1,:128] | 95 vst1.32 {d16-d19}, [r1,:128] |
96 vst1.32 {d0-d3}, [r0,:128] | 96 vst1.32 {d0-d3}, [r0,:128] |
97 | 97 |
98 bx lr | 98 bx lr |
99 .endfunc | 99 endfunc |
100 | 100 |
101 function fft16_neon | 101 function fft16_neon |
102 movrel r1, mppm | 102 movrel r1, mppm |
103 vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3} | 103 vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3} |
104 pld [r0, #32] | 104 pld [r0, #32] |
196 vst2.32 {d18-d19},[r0,:128], r1 | 196 vst2.32 {d18-d19},[r0,:128], r1 |
197 vst2.32 {d22-d23},[r0,:128], r1 | 197 vst2.32 {d22-d23},[r0,:128], r1 |
198 vst2.32 {d26-d27},[r0,:128], r1 | 198 vst2.32 {d26-d27},[r0,:128], r1 |
199 vst2.32 {d30-d31},[r0,:128] | 199 vst2.32 {d30-d31},[r0,:128] |
200 bx lr | 200 bx lr |
201 .endfunc | 201 endfunc |
202 | 202 |
203 function fft_pass_neon | 203 function fft_pass_neon |
204 push {r4-r6,lr} | 204 push {r4-r6,lr} |
205 mov r6, r2 @ n | 205 mov r6, r2 @ n |
206 lsl r5, r2, #3 @ 2 * n * sizeof FFTSample | 206 lsl r5, r2, #3 @ 2 * n * sizeof FFTSample |
272 vst2.32 {d22-d23}, [r3,:128]! @ {z[o3],z[o3+1]} | 272 vst2.32 {d22-d23}, [r3,:128]! @ {z[o3],z[o3+1]} |
273 vst2.32 {d18-d19}, [r4,:128]! @ {z[o1],z[o1+1]} | 273 vst2.32 {d18-d19}, [r4,:128]! @ {z[o1],z[o1+1]} |
274 bne 1b | 274 bne 1b |
275 | 275 |
276 pop {r4-r6,pc} | 276 pop {r4-r6,pc} |
277 .endfunc | 277 endfunc |
278 | 278 |
279 .macro def_fft n, n2, n4 | 279 .macro def_fft n, n2, n4 |
280 .align 6 | 280 .align 6 |
281 function fft\n\()_neon | 281 function fft\n\()_neon |
282 push {r4, lr} | 282 push {r4, lr} |
289 mov r0, r4 | 289 mov r0, r4 |
290 pop {r4, lr} | 290 pop {r4, lr} |
291 movrel r1, X(ff_cos_\n) | 291 movrel r1, X(ff_cos_\n) |
292 mov r2, #\n4/2 | 292 mov r2, #\n4/2 |
293 b fft_pass_neon | 293 b fft_pass_neon |
294 .endfunc | 294 endfunc |
295 .endm | 295 .endm |
296 | 296 |
297 def_fft 32, 16, 8 | 297 def_fft 32, 16, 8 |
298 def_fft 64, 32, 16 | 298 def_fft 64, 32, 16 |
299 def_fft 128, 64, 32 | 299 def_fft 128, 64, 32 |
312 sub r2, r2, #2 | 312 sub r2, r2, #2 |
313 movrel r3, fft_tab_neon | 313 movrel r3, fft_tab_neon |
314 ldr r3, [r3, r2, lsl #2] | 314 ldr r3, [r3, r2, lsl #2] |
315 mov r0, r1 | 315 mov r0, r1 |
316 bx r3 | 316 bx r3 |
317 .endfunc | 317 endfunc |
318 | 318 |
319 function ff_fft_permute_neon, export=1 | 319 function ff_fft_permute_neon, export=1 |
320 push {r4,lr} | 320 push {r4,lr} |
321 mov r12, #1 | 321 mov r12, #1 |
322 ldr r2, [r0] @ nbits | 322 ldr r2, [r0] @ nbits |
342 vst1.32 {d0-d3}, [r1,:128]! | 342 vst1.32 {d0-d3}, [r1,:128]! |
343 subs r2, r2, #4 | 343 subs r2, r2, #4 |
344 bgt 1b | 344 bgt 1b |
345 | 345 |
346 pop {r4,pc} | 346 pop {r4,pc} |
347 .endfunc | 347 endfunc |
348 | 348 |
349 .section .rodata | 349 .section .rodata |
350 .align 4 | 350 .align 4 |
351 fft_tab_neon: | 351 fft_tab_neon: |
352 .word fft4_neon | 352 .word fft4_neon |