comparison arm/dsputil_neon.S @ 11443:361a5fcb4393 libavcodec

ARM: set size of asm functions in object files
author mru
date Tue, 09 Mar 2010 16:17:56 +0000
parents 5c5b864d66e1
children 659f16d04776
comparison
equal deleted inserted replaced
11442:fe32d9ba1c86 11443:361a5fcb4393
238 .endm 238 .endm
239 239
240 .macro pixfunc pfx name suf rnd_op args:vararg 240 .macro pixfunc pfx name suf rnd_op args:vararg
241 function ff_\pfx\name\suf\()_neon, export=1 241 function ff_\pfx\name\suf\()_neon, export=1
242 \name \rnd_op \args 242 \name \rnd_op \args
243 .endfunc 243 endfunc
244 .endm 244 .endm
245 245
246 .macro pixfunc2 pfx name args:vararg 246 .macro pixfunc2 pfx name args:vararg
247 pixfunc \pfx \name 247 pixfunc \pfx \name
248 pixfunc \pfx \name \args 248 pixfunc \pfx \name \args
249 .endm 249 .endm
250 250
251 function ff_put_h264_qpel16_mc00_neon, export=1 251 function ff_put_h264_qpel16_mc00_neon, export=1
252 mov r3, #16 252 mov r3, #16
253 .endfunc 253 endfunc
254 254
255 pixfunc put_ pixels16 255 pixfunc put_ pixels16
256 pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8 256 pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
257 pixfunc2 put_ pixels16_y2, _no_rnd, vhadd.u8 257 pixfunc2 put_ pixels16_y2, _no_rnd, vhadd.u8
258 pixfunc2 put_ pixels16_xy2, _no_rnd, vshrn.u16, 1 258 pixfunc2 put_ pixels16_xy2, _no_rnd, vshrn.u16, 1
259 259
260 function ff_avg_h264_qpel16_mc00_neon, export=1 260 function ff_avg_h264_qpel16_mc00_neon, export=1
261 mov r3, #16 261 mov r3, #16
262 .endfunc 262 endfunc
263 263
264 pixfunc avg_ pixels16,, 1 264 pixfunc avg_ pixels16,, 1
265 265
266 function ff_put_h264_qpel8_mc00_neon, export=1 266 function ff_put_h264_qpel8_mc00_neon, export=1
267 mov r3, #8 267 mov r3, #8
268 .endfunc 268 endfunc
269 269
270 pixfunc put_ pixels8 270 pixfunc put_ pixels8
271 pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8 271 pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
272 pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8 272 pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8
273 pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1 273 pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1
274 274
275 function ff_avg_h264_qpel8_mc00_neon, export=1 275 function ff_avg_h264_qpel8_mc00_neon, export=1
276 mov r3, #8 276 mov r3, #8
277 .endfunc 277 endfunc
278 278
279 pixfunc avg_ pixels8,, 1 279 pixfunc avg_ pixels8,, 1
280 280
281 function ff_put_pixels_clamped_neon, export=1 281 function ff_put_pixels_clamped_neon, export=1
282 vld1.64 {d16-d19}, [r0,:128]! 282 vld1.64 {d16-d19}, [r0,:128]!
298 vst1.64 {d4}, [r1,:64], r2 298 vst1.64 {d4}, [r1,:64], r2
299 vst1.64 {d5}, [r1,:64], r2 299 vst1.64 {d5}, [r1,:64], r2
300 vst1.64 {d6}, [r1,:64], r2 300 vst1.64 {d6}, [r1,:64], r2
301 vst1.64 {d7}, [r1,:64], r2 301 vst1.64 {d7}, [r1,:64], r2
302 bx lr 302 bx lr
303 .endfunc 303 endfunc
304 304
305 function ff_put_signed_pixels_clamped_neon, export=1 305 function ff_put_signed_pixels_clamped_neon, export=1
306 vmov.u8 d31, #128 306 vmov.u8 d31, #128
307 vld1.64 {d16-d17}, [r0,:128]! 307 vld1.64 {d16-d17}, [r0,:128]!
308 vqmovn.s16 d0, q8 308 vqmovn.s16 d0, q8
335 vadd.u8 d6, d6, d31 335 vadd.u8 d6, d6, d31
336 vadd.u8 d7, d7, d31 336 vadd.u8 d7, d7, d31
337 vst1.64 {d6}, [r1,:64], r2 337 vst1.64 {d6}, [r1,:64], r2
338 vst1.64 {d7}, [r1,:64], r2 338 vst1.64 {d7}, [r1,:64], r2
339 bx lr 339 bx lr
340 .endfunc 340 endfunc
341 341
342 function ff_add_pixels_clamped_neon, export=1 342 function ff_add_pixels_clamped_neon, export=1
343 mov r3, r1 343 mov r3, r1
344 vld1.64 {d16}, [r1,:64], r2 344 vld1.64 {d16}, [r1,:64], r2
345 vld1.64 {d0-d1}, [r0,:128]! 345 vld1.64 {d0-d1}, [r0,:128]!
380 vst1.64 {d2}, [r3,:64], r2 380 vst1.64 {d2}, [r3,:64], r2
381 vqmovun.s16 d6, q3 381 vqmovun.s16 d6, q3
382 vst1.64 {d4}, [r3,:64], r2 382 vst1.64 {d4}, [r3,:64], r2
383 vst1.64 {d6}, [r3,:64], r2 383 vst1.64 {d6}, [r3,:64], r2
384 bx lr 384 bx lr
385 .endfunc 385 endfunc
386 386
387 function ff_float_to_int16_neon, export=1 387 function ff_float_to_int16_neon, export=1
388 subs r2, r2, #8 388 subs r2, r2, #8
389 vld1.64 {d0-d1}, [r1,:128]! 389 vld1.64 {d0-d1}, [r1,:128]!
390 vcvt.s32.f32 q8, q0, #16 390 vcvt.s32.f32 q8, q0, #16
424 bx lr 424 bx lr
425 3: vshrn.s32 d4, q8, #16 425 3: vshrn.s32 d4, q8, #16
426 vshrn.s32 d5, q9, #16 426 vshrn.s32 d5, q9, #16
427 vst1.64 {d4-d5}, [r0,:128]! 427 vst1.64 {d4-d5}, [r0,:128]!
428 bx lr 428 bx lr
429 .endfunc 429 endfunc
430 430
431 function ff_float_to_int16_interleave_neon, export=1 431 function ff_float_to_int16_interleave_neon, export=1
432 cmp r3, #2 432 cmp r3, #2
433 ldrlt r1, [r1] 433 ldrlt r1, [r1]
434 blt ff_float_to_int16_neon 434 blt ff_float_to_int16_neon
717 vld1.64 {d0-d1}, [r4,:128]! 717 vld1.64 {d0-d1}, [r4,:128]!
718 vcvt.s32.f32 q0, q0, #16 718 vcvt.s32.f32 q0, q0, #16
719 vld1.64 {d2-d3}, [r4,:128]! 719 vld1.64 {d2-d3}, [r4,:128]!
720 vcvt.s32.f32 q1, q1, #16 720 vcvt.s32.f32 q1, q1, #16
721 b 6b 721 b 6b
722 .endfunc 722 endfunc
723 723
724 function ff_vector_fmul_neon, export=1 724 function ff_vector_fmul_neon, export=1
725 mov r3, r0 725 mov r3, r0
726 subs r2, r2, #8 726 subs r2, r2, #8
727 vld1.64 {d0-d3}, [r0,:128]! 727 vld1.64 {d0-d3}, [r0,:128]!
757 vld1.64 {d6-d7}, [r1,:128]! 757 vld1.64 {d6-d7}, [r1,:128]!
758 vst1.64 {d18-d19},[r3,:128]! 758 vst1.64 {d18-d19},[r3,:128]!
759 vmul.f32 q9, q1, q3 759 vmul.f32 q9, q1, q3
760 3: vst1.64 {d16-d19},[r3,:128]! 760 3: vst1.64 {d16-d19},[r3,:128]!
761 bx lr 761 bx lr
762 .endfunc 762 endfunc
763 763
764 function ff_vector_fmul_window_neon, export=1 764 function ff_vector_fmul_window_neon, export=1
765 VFP vdup.32 q8, d0[0] 765 VFP vdup.32 q8, d0[0]
766 NOVFP vld1.32 {d16[],d17[]}, [sp,:32] 766 NOVFP vld1.32 {d16[],d17[]}, [sp,:32]
767 push {r4,r5,lr} 767 push {r4,r5,lr}
809 vrev64.32 q11, q11 809 vrev64.32 q11, q11
810 vswp d22, d23 810 vswp d22, d23
811 vst1.64 {d20,d21},[r0,:128]! 811 vst1.64 {d20,d21},[r0,:128]!
812 vst1.64 {d22,d23},[ip,:128], r5 812 vst1.64 {d22,d23},[ip,:128], r5
813 pop {r4,r5,pc} 813 pop {r4,r5,pc}
814 .endfunc 814 endfunc
815 815
816 #if CONFIG_VORBIS_DECODER 816 #if CONFIG_VORBIS_DECODER
817 function ff_vorbis_inverse_coupling_neon, export=1 817 function ff_vorbis_inverse_coupling_neon, export=1
818 vmov.i32 q10, #1<<31 818 vmov.i32 q10, #1<<31
819 subs r2, r2, #4 819 subs r2, r2, #4
870 vadd.f32 q1, q0, q2 870 vadd.f32 q1, q0, q2
871 vsub.f32 q0, q0, q3 871 vsub.f32 q0, q0, q3
872 vst1.32 {d2-d3}, [r0,:128]! 872 vst1.32 {d2-d3}, [r0,:128]!
873 vst1.32 {d0-d1}, [r1,:128]! 873 vst1.32 {d0-d1}, [r1,:128]!
874 bx lr 874 bx lr
875 .endfunc 875 endfunc
876 #endif 876 #endif
877 877
878 function ff_vector_fmul_scalar_neon, export=1 878 function ff_vector_fmul_scalar_neon, export=1
879 VFP len .req r2 879 VFP len .req r2
880 NOVFP len .req r3 880 NOVFP len .req r3
908 vst1.32 {q0},[r0,:128]! 908 vst1.32 {q0},[r0,:128]!
909 subs len, len, #4 909 subs len, len, #4
910 bgt 3b 910 bgt 3b
911 bx lr 911 bx lr
912 .unreq len 912 .unreq len
913 .endfunc 913 endfunc
914 914
915 function ff_vector_fmul_sv_scalar_2_neon, export=1 915 function ff_vector_fmul_sv_scalar_2_neon, export=1
916 VFP vdup.32 d16, d0[0] 916 VFP vdup.32 d16, d0[0]
917 NOVFP vdup.32 d16, r3 917 NOVFP vdup.32 d16, r3
918 NOVFP ldr r3, [sp] 918 NOVFP ldr r3, [sp]
934 vst1.32 {d5},[r0,:64]! 934 vst1.32 {d5},[r0,:64]!
935 b 1b 935 b 1b
936 2: vst1.32 {d4},[r0,:64]! 936 2: vst1.32 {d4},[r0,:64]!
937 vst1.32 {d5},[r0,:64]! 937 vst1.32 {d5},[r0,:64]!
938 bx lr 938 bx lr
939 .endfunc 939 endfunc
940 940
941 function ff_vector_fmul_sv_scalar_4_neon, export=1 941 function ff_vector_fmul_sv_scalar_4_neon, export=1
942 VFP vdup.32 q10, d0[0] 942 VFP vdup.32 q10, d0[0]
943 NOVFP vdup.32 q10, r3 943 NOVFP vdup.32 q10, r3
944 NOVFP ldr r3, [sp] 944 NOVFP ldr r3, [sp]
973 vmul.f32 q0, q0, q1 973 vmul.f32 q0, q0, q1
974 vst1.32 {q0},[r0,:128]! 974 vst1.32 {q0},[r0,:128]!
975 subs r3, r3, #4 975 subs r3, r3, #4
976 bgt 3b 976 bgt 3b
977 pop {pc} 977 pop {pc}
978 .endfunc 978 endfunc
979 979
980 function ff_sv_fmul_scalar_2_neon, export=1 980 function ff_sv_fmul_scalar_2_neon, export=1
981 VFP len .req r2 981 VFP len .req r2
982 NOVFP len .req r3 982 NOVFP len .req r3
983 VFP vdup.32 q8, d0[0] 983 VFP vdup.32 q8, d0[0]
996 vst1.32 {q1},[r0,:128]! 996 vst1.32 {q1},[r0,:128]!
997 b 1b 997 b 1b
998 2: vst1.32 {q1},[r0,:128]! 998 2: vst1.32 {q1},[r0,:128]!
999 bx lr 999 bx lr
1000 .unreq len 1000 .unreq len
1001 .endfunc 1001 endfunc
1002 1002
1003 function ff_sv_fmul_scalar_4_neon, export=1 1003 function ff_sv_fmul_scalar_4_neon, export=1
1004 VFP len .req r2 1004 VFP len .req r2
1005 NOVFP len .req r3 1005 NOVFP len .req r3
1006 VFP vdup.32 q8, d0[0] 1006 VFP vdup.32 q8, d0[0]
1011 vst1.32 {q0},[r0,:128]! 1011 vst1.32 {q0},[r0,:128]!
1012 subs len, len, #4 1012 subs len, len, #4
1013 bgt 1b 1013 bgt 1b
1014 bx lr 1014 bx lr
1015 .unreq len 1015 .unreq len
1016 .endfunc 1016 endfunc
1017 1017
1018 function ff_butterflies_float_neon, export=1 1018 function ff_butterflies_float_neon, export=1
1019 1: vld1.32 {q0},[r0,:128] 1019 1: vld1.32 {q0},[r0,:128]
1020 vld1.32 {q1},[r1,:128] 1020 vld1.32 {q1},[r1,:128]
1021 vsub.f32 q2, q0, q1 1021 vsub.f32 q2, q0, q1
1023 vst1.32 {q2},[r1,:128]! 1023 vst1.32 {q2},[r1,:128]!
1024 vst1.32 {q1},[r0,:128]! 1024 vst1.32 {q1},[r0,:128]!
1025 subs r2, r2, #4 1025 subs r2, r2, #4
1026 bgt 1b 1026 bgt 1b
1027 bx lr 1027 bx lr
1028 .endfunc 1028 endfunc
1029 1029
1030 function ff_scalarproduct_float_neon, export=1 1030 function ff_scalarproduct_float_neon, export=1
1031 vmov.f32 q2, #0.0 1031 vmov.f32 q2, #0.0
1032 1: vld1.32 {q0},[r0,:128]! 1032 1: vld1.32 {q0},[r0,:128]!
1033 vld1.32 {q1},[r1,:128]! 1033 vld1.32 {q1},[r1,:128]!
1036 bgt 1b 1036 bgt 1b
1037 vadd.f32 d0, d4, d5 1037 vadd.f32 d0, d4, d5
1038 vpadd.f32 d0, d0, d0 1038 vpadd.f32 d0, d0, d0
1039 NOVFP vmov.32 r0, d0[0] 1039 NOVFP vmov.32 r0, d0[0]
1040 bx lr 1040 bx lr
1041 .endfunc 1041 endfunc
1042 1042
1043 function ff_int32_to_float_fmul_scalar_neon, export=1 1043 function ff_int32_to_float_fmul_scalar_neon, export=1
1044 VFP vdup.32 q0, d0[0] 1044 VFP vdup.32 q0, d0[0]
1045 VFP len .req r2 1045 VFP len .req r2
1046 NOVFP vdup.32 q0, r2 1046 NOVFP vdup.32 q0, r2
1064 b 1b 1064 b 1b
1065 2: vst1.32 {q9}, [r0,:128]! 1065 2: vst1.32 {q9}, [r0,:128]!
1066 vst1.32 {q10},[r0,:128]! 1066 vst1.32 {q10},[r0,:128]!
1067 bx lr 1067 bx lr
1068 .unreq len 1068 .unreq len
1069 .endfunc 1069 endfunc
1070 1070
1071 function ff_vector_fmul_reverse_neon, export=1 1071 function ff_vector_fmul_reverse_neon, export=1
1072 add r2, r2, r3, lsl #2 1072 add r2, r2, r3, lsl #2
1073 sub r2, r2, #32 1073 sub r2, r2, #32
1074 mov r12, #-32 1074 mov r12, #-32
1088 vld1.32 {q2-q3}, [r2,:128], r12 1088 vld1.32 {q2-q3}, [r2,:128], r12
1089 vst1.32 {q8-q9}, [r0,:128]! 1089 vst1.32 {q8-q9}, [r0,:128]!
1090 b 1b 1090 b 1b
1091 2: vst1.32 {q8-q9}, [r0,:128]! 1091 2: vst1.32 {q8-q9}, [r0,:128]!
1092 bx lr 1092 bx lr
1093 .endfunc 1093 endfunc
1094 1094
1095 function ff_vector_fmul_add_neon, export=1 1095 function ff_vector_fmul_add_neon, export=1
1096 ldr r12, [sp] 1096 ldr r12, [sp]
1097 vld1.32 {q0-q1}, [r1,:128]! 1097 vld1.32 {q0-q1}, [r1,:128]!
1098 vld1.32 {q8-q9}, [r2,:128]! 1098 vld1.32 {q8-q9}, [r2,:128]!
1115 vld1.32 {q2-q3}, [r3,:128]! 1115 vld1.32 {q2-q3}, [r3,:128]!
1116 vst1.32 {q12-q13},[r0,:128]! 1116 vst1.32 {q12-q13},[r0,:128]!
1117 b 1b 1117 b 1b
1118 2: vst1.32 {q12-q13},[r0,:128]! 1118 2: vst1.32 {q12-q13},[r0,:128]!
1119 bx lr 1119 bx lr
1120 .endfunc 1120 endfunc
1121 1121
1122 function ff_vector_clipf_neon, export=1 1122 function ff_vector_clipf_neon, export=1
1123 VFP vdup.32 q1, d0[1] 1123 VFP vdup.32 q1, d0[1]
1124 VFP vdup.32 q0, d0[0] 1124 VFP vdup.32 q0, d0[0]
1125 NOVFP vdup.32 q0, r2 1125 NOVFP vdup.32 q0, r2
1141 vst1.f32 {q9},[r0,:128]! 1141 vst1.f32 {q9},[r0,:128]!
1142 b 1b 1142 b 1b
1143 2: vst1.f32 {q8},[r0,:128]! 1143 2: vst1.f32 {q8},[r0,:128]!
1144 vst1.f32 {q9},[r0,:128]! 1144 vst1.f32 {q9},[r0,:128]!
1145 bx lr 1145 bx lr
1146 .endfunc 1146 endfunc