comparison arm/fft_neon.S @ 11443:361a5fcb4393 libavcodec

ARM: set size of asm functions in object files
author mru
date Tue, 09 Mar 2010 16:17:56 +0000
parents f12b7ea2df2a
children c80c7a717156
comparison
equal deleted inserted replaced
11442:fe32d9ba1c86 11443:361a5fcb4393
41 vsub.f32 d2, d4, d5 41 vsub.f32 d2, d4, d5
42 42
43 vst1.32 {d0-d3}, [r0,:128] 43 vst1.32 {d0-d3}, [r0,:128]
44 44
45 bx lr 45 bx lr
46 .endfunc 46 endfunc
47 47
48 function fft8_neon 48 function fft8_neon
49 mov r1, r0 49 mov r1, r0
50 vld1.32 {d0-d3}, [r1,:128]! 50 vld1.32 {d0-d3}, [r1,:128]!
51 vld1.32 {d16-d19}, [r1,:128] 51 vld1.32 {d16-d19}, [r1,:128]
94 94
95 vst1.32 {d16-d19}, [r1,:128] 95 vst1.32 {d16-d19}, [r1,:128]
96 vst1.32 {d0-d3}, [r0,:128] 96 vst1.32 {d0-d3}, [r0,:128]
97 97
98 bx lr 98 bx lr
99 .endfunc 99 endfunc
100 100
101 function fft16_neon 101 function fft16_neon
102 movrel r1, mppm 102 movrel r1, mppm
103 vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3} 103 vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3}
104 pld [r0, #32] 104 pld [r0, #32]
196 vst2.32 {d18-d19},[r0,:128], r1 196 vst2.32 {d18-d19},[r0,:128], r1
197 vst2.32 {d22-d23},[r0,:128], r1 197 vst2.32 {d22-d23},[r0,:128], r1
198 vst2.32 {d26-d27},[r0,:128], r1 198 vst2.32 {d26-d27},[r0,:128], r1
199 vst2.32 {d30-d31},[r0,:128] 199 vst2.32 {d30-d31},[r0,:128]
200 bx lr 200 bx lr
201 .endfunc 201 endfunc
202 202
203 function fft_pass_neon 203 function fft_pass_neon
204 push {r4-r6,lr} 204 push {r4-r6,lr}
205 mov r6, r2 @ n 205 mov r6, r2 @ n
206 lsl r5, r2, #3 @ 2 * n * sizeof FFTSample 206 lsl r5, r2, #3 @ 2 * n * sizeof FFTSample
272 vst2.32 {d22-d23}, [r3,:128]! @ {z[o3],z[o3+1]} 272 vst2.32 {d22-d23}, [r3,:128]! @ {z[o3],z[o3+1]}
273 vst2.32 {d18-d19}, [r4,:128]! @ {z[o1],z[o1+1]} 273 vst2.32 {d18-d19}, [r4,:128]! @ {z[o1],z[o1+1]}
274 bne 1b 274 bne 1b
275 275
276 pop {r4-r6,pc} 276 pop {r4-r6,pc}
277 .endfunc 277 endfunc
278 278
279 .macro def_fft n, n2, n4 279 .macro def_fft n, n2, n4
280 .align 6 280 .align 6
281 function fft\n\()_neon 281 function fft\n\()_neon
282 push {r4, lr} 282 push {r4, lr}
289 mov r0, r4 289 mov r0, r4
290 pop {r4, lr} 290 pop {r4, lr}
291 movrel r1, X(ff_cos_\n) 291 movrel r1, X(ff_cos_\n)
292 mov r2, #\n4/2 292 mov r2, #\n4/2
293 b fft_pass_neon 293 b fft_pass_neon
294 .endfunc 294 endfunc
295 .endm 295 .endm
296 296
297 def_fft 32, 16, 8 297 def_fft 32, 16, 8
298 def_fft 64, 32, 16 298 def_fft 64, 32, 16
299 def_fft 128, 64, 32 299 def_fft 128, 64, 32
312 sub r2, r2, #2 312 sub r2, r2, #2
313 movrel r3, fft_tab_neon 313 movrel r3, fft_tab_neon
314 ldr r3, [r3, r2, lsl #2] 314 ldr r3, [r3, r2, lsl #2]
315 mov r0, r1 315 mov r0, r1
316 bx r3 316 bx r3
317 .endfunc 317 endfunc
318 318
319 function ff_fft_permute_neon, export=1 319 function ff_fft_permute_neon, export=1
320 push {r4,lr} 320 push {r4,lr}
321 mov r12, #1 321 mov r12, #1
322 ldr r2, [r0] @ nbits 322 ldr r2, [r0] @ nbits
342 vst1.32 {d0-d3}, [r1,:128]! 342 vst1.32 {d0-d3}, [r1,:128]!
343 subs r2, r2, #4 343 subs r2, r2, #4
344 bgt 1b 344 bgt 1b
345 345
346 pop {r4,pc} 346 pop {r4,pc}
347 .endfunc 347 endfunc
348 348
349 .section .rodata 349 .section .rodata
350 .align 4 350 .align 4
351 fft_tab_neon: 351 fft_tab_neon:
352 .word fft4_neon 352 .word fft4_neon