comparison arm/h264dsp_neon.S @ 11443:361a5fcb4393 libavcodec

ARM: set size of asm functions in object files
author mru
date Tue, 09 Mar 2010 16:17:56 +0000
parents 5506cbb012b4
children a2de48cf798e
comparison
equal deleted inserted replaced
11442:fe32d9ba1c86 11443:361a5fcb4393
181 vst1.64 {d16}, [r0,:64], r2 181 vst1.64 {d16}, [r0,:64], r2
182 vst1.64 {d17}, [r0,:64], r2 182 vst1.64 {d17}, [r0,:64], r2
183 bgt 5b 183 bgt 5b
184 184
185 pop {r4-r7, pc} 185 pop {r4-r7, pc}
186 .endfunc 186 endfunc
187 .endm 187 .endm
188 188
189 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ 189 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
190 .macro h264_chroma_mc4 type 190 .macro h264_chroma_mc4 type
191 function ff_\type\()_h264_chroma_mc4_neon, export=1 191 function ff_\type\()_h264_chroma_mc4_neon, export=1
315 vst1.32 {d16[0]}, [r0,:32], r2 315 vst1.32 {d16[0]}, [r0,:32], r2
316 vst1.32 {d16[1]}, [r0,:32], r2 316 vst1.32 {d16[1]}, [r0,:32], r2
317 bgt 5b 317 bgt 5b
318 318
319 pop {r4-r7, pc} 319 pop {r4-r7, pc}
320 .endfunc 320 endfunc
321 .endm 321 .endm
322 322
323 .macro h264_chroma_mc2 type 323 .macro h264_chroma_mc2 type
324 function ff_\type\()_h264_chroma_mc2_neon, export=1 324 function ff_\type\()_h264_chroma_mc2_neon, export=1
325 push {r4-r6, lr} 325 push {r4-r6, lr}
383 vst1.16 {d16[1]}, [r0,:16], r2 383 vst1.16 {d16[1]}, [r0,:16], r2
384 .endif 384 .endif
385 subs r3, r3, #2 385 subs r3, r3, #2
386 bgt 2b 386 bgt 2b
387 pop {r4-r6, pc} 387 pop {r4-r6, pc}
388 .endfunc 388 endfunc
389 .endm 389 .endm
390 390
391 .text 391 .text
392 .align 392 .align
393 393
516 vst1.64 {d0, d1}, [r0,:128], r1 516 vst1.64 {d0, d1}, [r0,:128], r1
517 vst1.64 {d10,d11}, [r0,:128] 517 vst1.64 {d10,d11}, [r0,:128]
518 518
519 align_pop_regs 519 align_pop_regs
520 bx lr 520 bx lr
521 .endfunc 521 endfunc
522 522
523 function ff_h264_h_loop_filter_luma_neon, export=1 523 function ff_h264_h_loop_filter_luma_neon, export=1
524 h264_loop_filter_start 524 h264_loop_filter_start
525 525
526 sub r0, r0, #4 526 sub r0, r0, #4
568 vst1.32 {d1[1]}, [r0], r1 568 vst1.32 {d1[1]}, [r0], r1
569 vst1.32 {d11[1]}, [r0], r1 569 vst1.32 {d11[1]}, [r0], r1
570 570
571 align_pop_regs 571 align_pop_regs
572 bx lr 572 bx lr
573 .endfunc 573 endfunc
574 574
575 .macro h264_loop_filter_chroma 575 .macro h264_loop_filter_chroma
576 vdup.8 d22, r2 @ alpha 576 vdup.8 d22, r2 @ alpha
577 vmovl.u8 q12, d24 577 vmovl.u8 q12, d24
578 vabd.u8 d26, d16, d0 @ abs(p0 - q0) 578 vabd.u8 d26, d16, d0 @ abs(p0 - q0)
619 sub r0, r0, r1, lsl #1 619 sub r0, r0, r1, lsl #1
620 vst1.64 {d16}, [r0,:64], r1 620 vst1.64 {d16}, [r0,:64], r1
621 vst1.64 {d0}, [r0,:64], r1 621 vst1.64 {d0}, [r0,:64], r1
622 622
623 bx lr 623 bx lr
624 .endfunc 624 endfunc
625 625
626 function ff_h264_h_loop_filter_chroma_neon, export=1 626 function ff_h264_h_loop_filter_chroma_neon, export=1
627 h264_loop_filter_start 627 h264_loop_filter_start
628 628
629 sub r0, r0, #2 629 sub r0, r0, #2
657 vst1.32 {d16[1]}, [r0], r1 657 vst1.32 {d16[1]}, [r0], r1
658 vst1.32 {d0[1]}, [r0], r1 658 vst1.32 {d0[1]}, [r0], r1
659 vst1.32 {d2[1]}, [r0], r1 659 vst1.32 {d2[1]}, [r0], r1
660 660
661 bx lr 661 bx lr
662 .endfunc 662 endfunc
663 663
664 /* H.264 qpel MC */ 664 /* H.264 qpel MC */
665 665
666 .macro lowpass_const r 666 .macro lowpass_const r
667 movw \r, #5 667 movw \r, #5
772 sub r1, r1, r2, lsl #4 772 sub r1, r1, r2, lsl #4
773 add r1, r1, #8 773 add r1, r1, #8
774 mov ip, #16 774 mov ip, #16
775 mov lr, r4 775 mov lr, r4
776 b put_h264_qpel8_h_lowpass_neon 776 b put_h264_qpel8_h_lowpass_neon
777 .endfunc 777 endfunc
778 778
779 .macro h264_qpel_h_lowpass type 779 .macro h264_qpel_h_lowpass type
780 function \type\()_h264_qpel16_h_lowpass_neon 780 function \type\()_h264_qpel16_h_lowpass_neon
781 push {lr} 781 push {lr}
782 mov ip, #16 782 mov ip, #16
785 sub r1, r1, r2, lsl #4 785 sub r1, r1, r2, lsl #4
786 add r0, r0, #8 786 add r0, r0, #8
787 add r1, r1, #8 787 add r1, r1, #8
788 mov ip, #16 788 mov ip, #16
789 pop {lr} 789 pop {lr}
790 .endfunc 790 endfunc
791 791
792 function \type\()_h264_qpel8_h_lowpass_neon 792 function \type\()_h264_qpel8_h_lowpass_neon
793 1: vld1.64 {d0, d1}, [r1], r2 793 1: vld1.64 {d0, d1}, [r1], r2
794 vld1.64 {d16,d17}, [r1], r2 794 vld1.64 {d16,d17}, [r1], r2
795 subs ip, ip, #2 795 subs ip, ip, #2
803 .endif 803 .endif
804 vst1.64 {d0}, [r0,:64], r3 804 vst1.64 {d0}, [r0,:64], r3
805 vst1.64 {d16}, [r0,:64], r3 805 vst1.64 {d16}, [r0,:64], r3
806 bne 1b 806 bne 1b
807 bx lr 807 bx lr
808 .endfunc 808 endfunc
809 .endm 809 .endm
810 810
811 h264_qpel_h_lowpass put 811 h264_qpel_h_lowpass put
812 h264_qpel_h_lowpass avg 812 h264_qpel_h_lowpass avg
813 813
822 add r0, r0, #8 822 add r0, r0, #8
823 add r1, r1, #8 823 add r1, r1, #8
824 add r3, r3, #8 824 add r3, r3, #8
825 mov ip, #16 825 mov ip, #16
826 pop {lr} 826 pop {lr}
827 .endfunc 827 endfunc
828 828
829 function \type\()_h264_qpel8_h_lowpass_l2_neon 829 function \type\()_h264_qpel8_h_lowpass_l2_neon
830 1: vld1.64 {d0, d1}, [r1], r2 830 1: vld1.64 {d0, d1}, [r1], r2
831 vld1.64 {d16,d17}, [r1], r2 831 vld1.64 {d16,d17}, [r1], r2
832 vld1.64 {d28}, [r3], r2 832 vld1.64 {d28}, [r3], r2
843 .endif 843 .endif
844 vst1.64 {d0}, [r0,:64], r2 844 vst1.64 {d0}, [r0,:64], r2
845 vst1.64 {d1}, [r0,:64], r2 845 vst1.64 {d1}, [r0,:64], r2
846 bne 1b 846 bne 1b
847 bx lr 847 bx lr
848 .endfunc 848 endfunc
849 .endm 849 .endm
850 850
851 h264_qpel_h_lowpass_l2 put 851 h264_qpel_h_lowpass_l2 put
852 h264_qpel_h_lowpass_l2 avg 852 h264_qpel_h_lowpass_l2 avg
853 853
862 add r1, r1, #8 862 add r1, r1, #8
863 bl put_h264_qpel8_v_lowpass_neon 863 bl put_h264_qpel8_v_lowpass_neon
864 sub r1, r1, r3, lsl #2 864 sub r1, r1, r3, lsl #2
865 mov lr, r4 865 mov lr, r4
866 b put_h264_qpel8_v_lowpass_neon 866 b put_h264_qpel8_v_lowpass_neon
867 .endfunc 867 endfunc
868 868
869 .macro h264_qpel_v_lowpass type 869 .macro h264_qpel_v_lowpass type
870 function \type\()_h264_qpel16_v_lowpass_neon 870 function \type\()_h264_qpel16_v_lowpass_neon
871 mov r4, lr 871 mov r4, lr
872 bl \type\()_h264_qpel8_v_lowpass_neon 872 bl \type\()_h264_qpel8_v_lowpass_neon
878 sub r1, r1, r3, lsl #2 878 sub r1, r1, r3, lsl #2
879 add r1, r1, #8 879 add r1, r1, #8
880 bl \type\()_h264_qpel8_v_lowpass_neon 880 bl \type\()_h264_qpel8_v_lowpass_neon
881 sub r1, r1, r3, lsl #2 881 sub r1, r1, r3, lsl #2
882 mov lr, r4 882 mov lr, r4
883 .endfunc 883 endfunc
884 884
885 function \type\()_h264_qpel8_v_lowpass_neon 885 function \type\()_h264_qpel8_v_lowpass_neon
886 vld1.64 {d8}, [r1], r3 886 vld1.64 {d8}, [r1], r3
887 vld1.64 {d10}, [r1], r3 887 vld1.64 {d10}, [r1], r3
888 vld1.64 {d12}, [r1], r3 888 vld1.64 {d12}, [r1], r3
932 vst1.64 {d24}, [r0,:64], r2 932 vst1.64 {d24}, [r0,:64], r2
933 vst1.64 {d26}, [r0,:64], r2 933 vst1.64 {d26}, [r0,:64], r2
934 vst1.64 {d28}, [r0,:64], r2 934 vst1.64 {d28}, [r0,:64], r2
935 935
936 bx lr 936 bx lr
937 .endfunc 937 endfunc
938 .endm 938 .endm
939 939
940 h264_qpel_v_lowpass put 940 h264_qpel_v_lowpass put
941 h264_qpel_v_lowpass avg 941 h264_qpel_v_lowpass avg
942 942
954 sub r1, r1, r3, lsl #2 954 sub r1, r1, r3, lsl #2
955 add r1, r1, #8 955 add r1, r1, #8
956 bl \type\()_h264_qpel8_v_lowpass_l2_neon 956 bl \type\()_h264_qpel8_v_lowpass_l2_neon
957 sub r1, r1, r3, lsl #2 957 sub r1, r1, r3, lsl #2
958 mov lr, r4 958 mov lr, r4
959 .endfunc 959 endfunc
960 960
961 function \type\()_h264_qpel8_v_lowpass_l2_neon 961 function \type\()_h264_qpel8_v_lowpass_l2_neon
962 vld1.64 {d8}, [r1], r3 962 vld1.64 {d8}, [r1], r3
963 vld1.64 {d10}, [r1], r3 963 vld1.64 {d10}, [r1], r3
964 vld1.64 {d12}, [r1], r3 964 vld1.64 {d12}, [r1], r3
1021 vst1.64 {d5}, [r0,:64], r3 1021 vst1.64 {d5}, [r0,:64], r3
1022 vst1.64 {d10}, [r0,:64], r3 1022 vst1.64 {d10}, [r0,:64], r3
1023 vst1.64 {d11}, [r0,:64], r3 1023 vst1.64 {d11}, [r0,:64], r3
1024 1024
1025 bx lr 1025 bx lr
1026 .endfunc 1026 endfunc
1027 .endm 1027 .endm
1028 1028
1029 h264_qpel_v_lowpass_l2 put 1029 h264_qpel_v_lowpass_l2 put
1030 h264_qpel_v_lowpass_l2 avg 1030 h264_qpel_v_lowpass_l2 avg
1031 1031
1091 lowpass_8.16 q8, q15, d16, d17, d30, d31, d15 1091 lowpass_8.16 q8, q15, d16, d17, d30, d31, d15
1092 1092
1093 transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11 1093 transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
1094 1094
1095 bx lr 1095 bx lr
1096 .endfunc 1096 endfunc
1097 1097
1098 .macro h264_qpel8_hv_lowpass type 1098 .macro h264_qpel8_hv_lowpass type
1099 function \type\()_h264_qpel8_hv_lowpass_neon 1099 function \type\()_h264_qpel8_hv_lowpass_neon
1100 mov r10, lr 1100 mov r10, lr
1101 bl put_h264_qpel8_hv_lowpass_neon_top 1101 bl put_h264_qpel8_hv_lowpass_neon_top
1127 vst1.64 {d10}, [r0,:64], r2 1127 vst1.64 {d10}, [r0,:64], r2
1128 vst1.64 {d11}, [r0,:64], r2 1128 vst1.64 {d11}, [r0,:64], r2
1129 1129
1130 mov lr, r10 1130 mov lr, r10
1131 bx lr 1131 bx lr
1132 .endfunc 1132 endfunc
1133 .endm 1133 .endm
1134 1134
1135 h264_qpel8_hv_lowpass put 1135 h264_qpel8_hv_lowpass put
1136 h264_qpel8_hv_lowpass avg 1136 h264_qpel8_hv_lowpass avg
1137 1137
1176 vst1.64 {d6}, [r0,:64], r3 1176 vst1.64 {d6}, [r0,:64], r3
1177 vst1.64 {d7}, [r0,:64], r3 1177 vst1.64 {d7}, [r0,:64], r3
1178 1178
1179 mov lr, r10 1179 mov lr, r10
1180 bx lr 1180 bx lr
1181 .endfunc 1181 endfunc
1182 .endm 1182 .endm
1183 1183
1184 h264_qpel8_hv_lowpass_l2 put 1184 h264_qpel8_hv_lowpass_l2 put
1185 h264_qpel8_hv_lowpass_l2 avg 1185 h264_qpel8_hv_lowpass_l2 avg
1186 1186
1197 add r0, r0, #8 1197 add r0, r0, #8
1198 bl \type\()_h264_qpel8_hv_lowpass_neon 1198 bl \type\()_h264_qpel8_hv_lowpass_neon
1199 sub r1, r1, r3, lsl #2 1199 sub r1, r1, r3, lsl #2
1200 mov lr, r9 1200 mov lr, r9
1201 b \type\()_h264_qpel8_hv_lowpass_neon 1201 b \type\()_h264_qpel8_hv_lowpass_neon
1202 .endfunc 1202 endfunc
1203 1203
1204 function \type\()_h264_qpel16_hv_lowpass_l2_neon 1204 function \type\()_h264_qpel16_hv_lowpass_l2_neon
1205 mov r9, lr 1205 mov r9, lr
1206 sub r2, r4, #256 1206 sub r2, r4, #256
1207 bl \type\()_h264_qpel8_hv_lowpass_l2_neon 1207 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
1214 add r0, r0, #8 1214 add r0, r0, #8
1215 bl \type\()_h264_qpel8_hv_lowpass_l2_neon 1215 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
1216 sub r1, r1, r3, lsl #2 1216 sub r1, r1, r3, lsl #2
1217 mov lr, r9 1217 mov lr, r9
1218 b \type\()_h264_qpel8_hv_lowpass_l2_neon 1218 b \type\()_h264_qpel8_hv_lowpass_l2_neon
1219 .endfunc 1219 endfunc
1220 .endm 1220 .endm
1221 1221
1222 h264_qpel16_hv put 1222 h264_qpel16_hv put
1223 h264_qpel16_hv avg 1223 h264_qpel16_hv avg
1224 1224
1227 lowpass_const r3 1227 lowpass_const r3
1228 mov r3, r1 1228 mov r3, r1
1229 sub r1, r1, #2 1229 sub r1, r1, #2
1230 mov ip, #8 1230 mov ip, #8
1231 b \type\()_h264_qpel8_h_lowpass_l2_neon 1231 b \type\()_h264_qpel8_h_lowpass_l2_neon
1232 .endfunc 1232 endfunc
1233 1233
1234 function ff_\type\()_h264_qpel8_mc20_neon, export=1 1234 function ff_\type\()_h264_qpel8_mc20_neon, export=1
1235 lowpass_const r3 1235 lowpass_const r3
1236 sub r1, r1, #2 1236 sub r1, r1, #2
1237 mov r3, r2 1237 mov r3, r2
1238 mov ip, #8 1238 mov ip, #8
1239 b \type\()_h264_qpel8_h_lowpass_neon 1239 b \type\()_h264_qpel8_h_lowpass_neon
1240 .endfunc 1240 endfunc
1241 1241
1242 function ff_\type\()_h264_qpel8_mc30_neon, export=1 1242 function ff_\type\()_h264_qpel8_mc30_neon, export=1
1243 lowpass_const r3 1243 lowpass_const r3
1244 add r3, r1, #1 1244 add r3, r1, #1
1245 sub r1, r1, #2 1245 sub r1, r1, #2
1246 mov ip, #8 1246 mov ip, #8
1247 b \type\()_h264_qpel8_h_lowpass_l2_neon 1247 b \type\()_h264_qpel8_h_lowpass_l2_neon
1248 .endfunc 1248 endfunc
1249 1249
1250 function ff_\type\()_h264_qpel8_mc01_neon, export=1 1250 function ff_\type\()_h264_qpel8_mc01_neon, export=1
1251 push {lr} 1251 push {lr}
1252 mov ip, r1 1252 mov ip, r1
1253 \type\()_h264_qpel8_mc01: 1253 \type\()_h264_qpel8_mc01:
1256 sub r1, r1, r2, lsl #1 1256 sub r1, r1, r2, lsl #1
1257 vpush {d8-d15} 1257 vpush {d8-d15}
1258 bl \type\()_h264_qpel8_v_lowpass_l2_neon 1258 bl \type\()_h264_qpel8_v_lowpass_l2_neon
1259 vpop {d8-d15} 1259 vpop {d8-d15}
1260 pop {pc} 1260 pop {pc}
1261 .endfunc 1261 endfunc
1262 1262
1263 function ff_\type\()_h264_qpel8_mc11_neon, export=1 1263 function ff_\type\()_h264_qpel8_mc11_neon, export=1
1264 push {r0, r1, r11, lr} 1264 push {r0, r1, r11, lr}
1265 \type\()_h264_qpel8_mc11: 1265 \type\()_h264_qpel8_mc11:
1266 lowpass_const r3 1266 lowpass_const r3
1280 mov r2, #8 1280 mov r2, #8
1281 bl \type\()_h264_qpel8_v_lowpass_l2_neon 1281 bl \type\()_h264_qpel8_v_lowpass_l2_neon
1282 vpop {d8-d15} 1282 vpop {d8-d15}
1283 add sp, r11, #8 1283 add sp, r11, #8
1284 pop {r11, pc} 1284 pop {r11, pc}
1285 .endfunc 1285 endfunc
1286 1286
1287 function ff_\type\()_h264_qpel8_mc21_neon, export=1 1287 function ff_\type\()_h264_qpel8_mc21_neon, export=1
1288 push {r0, r1, r4, r10, r11, lr} 1288 push {r0, r1, r4, r10, r11, lr}
1289 \type\()_h264_qpel8_mc21: 1289 \type\()_h264_qpel8_mc21:
1290 lowpass_const r3 1290 lowpass_const r3
1305 sub r2, r4, #64 1305 sub r2, r4, #64
1306 bl \type\()_h264_qpel8_hv_lowpass_l2_neon 1306 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
1307 vpop {d8-d15} 1307 vpop {d8-d15}
1308 add sp, r11, #8 1308 add sp, r11, #8
1309 pop {r4, r10, r11, pc} 1309 pop {r4, r10, r11, pc}
1310 .endfunc 1310 endfunc
1311 1311
1312 function ff_\type\()_h264_qpel8_mc31_neon, export=1 1312 function ff_\type\()_h264_qpel8_mc31_neon, export=1
1313 add r1, r1, #1 1313 add r1, r1, #1
1314 push {r0, r1, r11, lr} 1314 push {r0, r1, r11, lr}
1315 sub r1, r1, #1 1315 sub r1, r1, #1
1316 b \type\()_h264_qpel8_mc11 1316 b \type\()_h264_qpel8_mc11
1317 .endfunc 1317 endfunc
1318 1318
1319 function ff_\type\()_h264_qpel8_mc02_neon, export=1 1319 function ff_\type\()_h264_qpel8_mc02_neon, export=1
1320 push {lr} 1320 push {lr}
1321 lowpass_const r3 1321 lowpass_const r3
1322 sub r1, r1, r2, lsl #1 1322 sub r1, r1, r2, lsl #1
1323 mov r3, r2 1323 mov r3, r2
1324 vpush {d8-d15} 1324 vpush {d8-d15}
1325 bl \type\()_h264_qpel8_v_lowpass_neon 1325 bl \type\()_h264_qpel8_v_lowpass_neon
1326 vpop {d8-d15} 1326 vpop {d8-d15}
1327 pop {pc} 1327 pop {pc}
1328 .endfunc 1328 endfunc
1329 1329
1330 function ff_\type\()_h264_qpel8_mc12_neon, export=1 1330 function ff_\type\()_h264_qpel8_mc12_neon, export=1
1331 push {r0, r1, r4, r10, r11, lr} 1331 push {r0, r1, r4, r10, r11, lr}
1332 \type\()_h264_qpel8_mc12: 1332 \type\()_h264_qpel8_mc12:
1333 lowpass_const r3 1333 lowpass_const r3
1347 sub r2, r4, #64 1347 sub r2, r4, #64
1348 bl \type\()_h264_qpel8_hv_lowpass_l2_neon 1348 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
1349 vpop {d8-d15} 1349 vpop {d8-d15}
1350 add sp, r11, #8 1350 add sp, r11, #8
1351 pop {r4, r10, r11, pc} 1351 pop {r4, r10, r11, pc}
1352 .endfunc 1352 endfunc
1353 1353
1354 function ff_\type\()_h264_qpel8_mc22_neon, export=1 1354 function ff_\type\()_h264_qpel8_mc22_neon, export=1
1355 push {r4, r10, r11, lr} 1355 push {r4, r10, r11, lr}
1356 mov r11, sp 1356 mov r11, sp
1357 bic sp, sp, #15 1357 bic sp, sp, #15
1363 vpush {d8-d15} 1363 vpush {d8-d15}
1364 bl \type\()_h264_qpel8_hv_lowpass_neon 1364 bl \type\()_h264_qpel8_hv_lowpass_neon
1365 vpop {d8-d15} 1365 vpop {d8-d15}
1366 mov sp, r11 1366 mov sp, r11
1367 pop {r4, r10, r11, pc} 1367 pop {r4, r10, r11, pc}
1368 .endfunc 1368 endfunc
1369 1369
1370 function ff_\type\()_h264_qpel8_mc32_neon, export=1 1370 function ff_\type\()_h264_qpel8_mc32_neon, export=1
1371 push {r0, r1, r4, r10, r11, lr} 1371 push {r0, r1, r4, r10, r11, lr}
1372 add r1, r1, #1 1372 add r1, r1, #1
1373 b \type\()_h264_qpel8_mc12 1373 b \type\()_h264_qpel8_mc12
1374 .endfunc 1374 endfunc
1375 1375
1376 function ff_\type\()_h264_qpel8_mc03_neon, export=1 1376 function ff_\type\()_h264_qpel8_mc03_neon, export=1
1377 push {lr} 1377 push {lr}
1378 add ip, r1, r2 1378 add ip, r1, r2
1379 b \type\()_h264_qpel8_mc01 1379 b \type\()_h264_qpel8_mc01
1380 .endfunc 1380 endfunc
1381 1381
1382 function ff_\type\()_h264_qpel8_mc13_neon, export=1 1382 function ff_\type\()_h264_qpel8_mc13_neon, export=1
1383 push {r0, r1, r11, lr} 1383 push {r0, r1, r11, lr}
1384 add r1, r1, r2 1384 add r1, r1, r2
1385 b \type\()_h264_qpel8_mc11 1385 b \type\()_h264_qpel8_mc11
1386 .endfunc 1386 endfunc
1387 1387
1388 function ff_\type\()_h264_qpel8_mc23_neon, export=1 1388 function ff_\type\()_h264_qpel8_mc23_neon, export=1
1389 push {r0, r1, r4, r10, r11, lr} 1389 push {r0, r1, r4, r10, r11, lr}
1390 add r1, r1, r2 1390 add r1, r1, r2
1391 b \type\()_h264_qpel8_mc21 1391 b \type\()_h264_qpel8_mc21
1392 .endfunc 1392 endfunc
1393 1393
1394 function ff_\type\()_h264_qpel8_mc33_neon, export=1 1394 function ff_\type\()_h264_qpel8_mc33_neon, export=1
1395 add r1, r1, #1 1395 add r1, r1, #1
1396 push {r0, r1, r11, lr} 1396 push {r0, r1, r11, lr}
1397 add r1, r1, r2 1397 add r1, r1, r2
1398 sub r1, r1, #1 1398 sub r1, r1, #1
1399 b \type\()_h264_qpel8_mc11 1399 b \type\()_h264_qpel8_mc11
1400 .endfunc 1400 endfunc
1401 .endm 1401 .endm
1402 1402
1403 h264_qpel8 put 1403 h264_qpel8 put
1404 h264_qpel8 avg 1404 h264_qpel8 avg
1405 1405
1407 function ff_\type\()_h264_qpel16_mc10_neon, export=1 1407 function ff_\type\()_h264_qpel16_mc10_neon, export=1
1408 lowpass_const r3 1408 lowpass_const r3
1409 mov r3, r1 1409 mov r3, r1
1410 sub r1, r1, #2 1410 sub r1, r1, #2
1411 b \type\()_h264_qpel16_h_lowpass_l2_neon 1411 b \type\()_h264_qpel16_h_lowpass_l2_neon
1412 .endfunc 1412 endfunc
1413 1413
1414 function ff_\type\()_h264_qpel16_mc20_neon, export=1 1414 function ff_\type\()_h264_qpel16_mc20_neon, export=1
1415 lowpass_const r3 1415 lowpass_const r3
1416 sub r1, r1, #2 1416 sub r1, r1, #2
1417 mov r3, r2 1417 mov r3, r2
1418 b \type\()_h264_qpel16_h_lowpass_neon 1418 b \type\()_h264_qpel16_h_lowpass_neon
1419 .endfunc 1419 endfunc
1420 1420
1421 function ff_\type\()_h264_qpel16_mc30_neon, export=1 1421 function ff_\type\()_h264_qpel16_mc30_neon, export=1
1422 lowpass_const r3 1422 lowpass_const r3
1423 add r3, r1, #1 1423 add r3, r1, #1
1424 sub r1, r1, #2 1424 sub r1, r1, #2
1425 b \type\()_h264_qpel16_h_lowpass_l2_neon 1425 b \type\()_h264_qpel16_h_lowpass_l2_neon
1426 .endfunc 1426 endfunc
1427 1427
1428 function ff_\type\()_h264_qpel16_mc01_neon, export=1 1428 function ff_\type\()_h264_qpel16_mc01_neon, export=1
1429 push {r4, lr} 1429 push {r4, lr}
1430 mov ip, r1 1430 mov ip, r1
1431 \type\()_h264_qpel16_mc01: 1431 \type\()_h264_qpel16_mc01:
1434 sub r1, r1, r2, lsl #1 1434 sub r1, r1, r2, lsl #1
1435 vpush {d8-d15} 1435 vpush {d8-d15}
1436 bl \type\()_h264_qpel16_v_lowpass_l2_neon 1436 bl \type\()_h264_qpel16_v_lowpass_l2_neon
1437 vpop {d8-d15} 1437 vpop {d8-d15}
1438 pop {r4, pc} 1438 pop {r4, pc}
1439 .endfunc 1439 endfunc
1440 1440
1441 function ff_\type\()_h264_qpel16_mc11_neon, export=1 1441 function ff_\type\()_h264_qpel16_mc11_neon, export=1
1442 push {r0, r1, r4, r11, lr} 1442 push {r0, r1, r4, r11, lr}
1443 \type\()_h264_qpel16_mc11: 1443 \type\()_h264_qpel16_mc11:
1444 lowpass_const r3 1444 lowpass_const r3
1457 mov r2, #16 1457 mov r2, #16
1458 bl \type\()_h264_qpel16_v_lowpass_l2_neon 1458 bl \type\()_h264_qpel16_v_lowpass_l2_neon
1459 vpop {d8-d15} 1459 vpop {d8-d15}
1460 add sp, r11, #8 1460 add sp, r11, #8
1461 pop {r4, r11, pc} 1461 pop {r4, r11, pc}
1462 .endfunc 1462 endfunc
1463 1463
1464 function ff_\type\()_h264_qpel16_mc21_neon, export=1 1464 function ff_\type\()_h264_qpel16_mc21_neon, export=1
1465 push {r0, r1, r4-r5, r9-r11, lr} 1465 push {r0, r1, r4-r5, r9-r11, lr}
1466 \type\()_h264_qpel16_mc21: 1466 \type\()_h264_qpel16_mc21:
1467 lowpass_const r3 1467 lowpass_const r3
1479 mov r3, r2 1479 mov r3, r2
1480 bl \type\()_h264_qpel16_hv_lowpass_l2_neon 1480 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
1481 vpop {d8-d15} 1481 vpop {d8-d15}
1482 add sp, r11, #8 1482 add sp, r11, #8
1483 pop {r4-r5, r9-r11, pc} 1483 pop {r4-r5, r9-r11, pc}
1484 .endfunc 1484 endfunc
1485 1485
1486 function ff_\type\()_h264_qpel16_mc31_neon, export=1 1486 function ff_\type\()_h264_qpel16_mc31_neon, export=1
1487 add r1, r1, #1 1487 add r1, r1, #1
1488 push {r0, r1, r4, r11, lr} 1488 push {r0, r1, r4, r11, lr}
1489 sub r1, r1, #1 1489 sub r1, r1, #1
1490 b \type\()_h264_qpel16_mc11 1490 b \type\()_h264_qpel16_mc11
1491 .endfunc 1491 endfunc
1492 1492
1493 function ff_\type\()_h264_qpel16_mc02_neon, export=1 1493 function ff_\type\()_h264_qpel16_mc02_neon, export=1
1494 push {r4, lr} 1494 push {r4, lr}
1495 lowpass_const r3 1495 lowpass_const r3
1496 sub r1, r1, r2, lsl #1 1496 sub r1, r1, r2, lsl #1
1497 mov r3, r2 1497 mov r3, r2
1498 vpush {d8-d15} 1498 vpush {d8-d15}
1499 bl \type\()_h264_qpel16_v_lowpass_neon 1499 bl \type\()_h264_qpel16_v_lowpass_neon
1500 vpop {d8-d15} 1500 vpop {d8-d15}
1501 pop {r4, pc} 1501 pop {r4, pc}
1502 .endfunc 1502 endfunc
1503 1503
1504 function ff_\type\()_h264_qpel16_mc12_neon, export=1 1504 function ff_\type\()_h264_qpel16_mc12_neon, export=1
1505 push {r0, r1, r4-r5, r9-r11, lr} 1505 push {r0, r1, r4-r5, r9-r11, lr}
1506 \type\()_h264_qpel16_mc12: 1506 \type\()_h264_qpel16_mc12:
1507 lowpass_const r3 1507 lowpass_const r3
1520 mov r2, r3 1520 mov r2, r3
1521 bl \type\()_h264_qpel16_hv_lowpass_l2_neon 1521 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
1522 vpop {d8-d15} 1522 vpop {d8-d15}
1523 add sp, r11, #8 1523 add sp, r11, #8
1524 pop {r4-r5, r9-r11, pc} 1524 pop {r4-r5, r9-r11, pc}
1525 .endfunc 1525 endfunc
1526 1526
1527 function ff_\type\()_h264_qpel16_mc22_neon, export=1 1527 function ff_\type\()_h264_qpel16_mc22_neon, export=1
1528 push {r4, r9-r11, lr} 1528 push {r4, r9-r11, lr}
1529 lowpass_const r3 1529 lowpass_const r3
1530 mov r11, sp 1530 mov r11, sp
1537 vpush {d8-d15} 1537 vpush {d8-d15}
1538 bl \type\()_h264_qpel16_hv_lowpass_neon 1538 bl \type\()_h264_qpel16_hv_lowpass_neon
1539 vpop {d8-d15} 1539 vpop {d8-d15}
1540 mov sp, r11 1540 mov sp, r11
1541 pop {r4, r9-r11, pc} 1541 pop {r4, r9-r11, pc}
1542 .endfunc 1542 endfunc
1543 1543
1544 function ff_\type\()_h264_qpel16_mc32_neon, export=1 1544 function ff_\type\()_h264_qpel16_mc32_neon, export=1
1545 push {r0, r1, r4-r5, r9-r11, lr} 1545 push {r0, r1, r4-r5, r9-r11, lr}
1546 add r1, r1, #1 1546 add r1, r1, #1
1547 b \type\()_h264_qpel16_mc12 1547 b \type\()_h264_qpel16_mc12
1548 .endfunc 1548 endfunc
1549 1549
1550 function ff_\type\()_h264_qpel16_mc03_neon, export=1 1550 function ff_\type\()_h264_qpel16_mc03_neon, export=1
1551 push {r4, lr} 1551 push {r4, lr}
1552 add ip, r1, r2 1552 add ip, r1, r2
1553 b \type\()_h264_qpel16_mc01 1553 b \type\()_h264_qpel16_mc01
1554 .endfunc 1554 endfunc
1555 1555
1556 function ff_\type\()_h264_qpel16_mc13_neon, export=1 1556 function ff_\type\()_h264_qpel16_mc13_neon, export=1
1557 push {r0, r1, r4, r11, lr} 1557 push {r0, r1, r4, r11, lr}
1558 add r1, r1, r2 1558 add r1, r1, r2
1559 b \type\()_h264_qpel16_mc11 1559 b \type\()_h264_qpel16_mc11
1560 .endfunc 1560 endfunc
1561 1561
1562 function ff_\type\()_h264_qpel16_mc23_neon, export=1 1562 function ff_\type\()_h264_qpel16_mc23_neon, export=1
1563 push {r0, r1, r4-r5, r9-r11, lr} 1563 push {r0, r1, r4-r5, r9-r11, lr}
1564 add r1, r1, r2 1564 add r1, r1, r2
1565 b \type\()_h264_qpel16_mc21 1565 b \type\()_h264_qpel16_mc21
1566 .endfunc 1566 endfunc
1567 1567
1568 function ff_\type\()_h264_qpel16_mc33_neon, export=1 1568 function ff_\type\()_h264_qpel16_mc33_neon, export=1
1569 add r1, r1, #1 1569 add r1, r1, #1
1570 push {r0, r1, r4, r11, lr} 1570 push {r0, r1, r4, r11, lr}
1571 add r1, r1, r2 1571 add r1, r1, r2
1572 sub r1, r1, #1 1572 sub r1, r1, #1
1573 b \type\()_h264_qpel16_mc11 1573 b \type\()_h264_qpel16_mc11
1574 .endfunc 1574 endfunc
1575 .endm 1575 .endm
1576 1576
1577 h264_qpel16 put 1577 h264_qpel16 put
1578 h264_qpel16 avg 1578 h264_qpel16 avg
1579 1579
1717 30: rsb r4, r4, #0 1717 30: rsb r4, r4, #0
1718 rsb r5, r5, #0 1718 rsb r5, r5, #0
1719 biweight_\w vmlsl.u8, vmlsl.u8 1719 biweight_\w vmlsl.u8, vmlsl.u8
1720 40: rsb r5, r5, #0 1720 40: rsb r5, r5, #0
1721 biweight_\w vmlsl.u8, vmlal.u8 1721 biweight_\w vmlsl.u8, vmlal.u8
1722 .endfunc 1722 endfunc
1723 .endm 1723 .endm
1724 1724
1725 .macro biweight_entry w, h, b=1 1725 .macro biweight_entry w, h, b=1
1726 function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1 1726 function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1
1727 mov ip, #\h 1727 mov ip, #\h
1728 .if \b 1728 .if \b
1729 b biweight_h264_pixels_\w\()_neon 1729 b biweight_h264_pixels_\w\()_neon
1730 .endif 1730 .endif
1731 .endfunc 1731 endfunc
1732 .endm 1732 .endm
1733 1733
1734 biweight_entry 16, 8 1734 biweight_entry 16, 8
1735 biweight_entry 16, 16, b=0 1735 biweight_entry 16, 16, b=0
1736 biweight_func 16 1736 biweight_func 16
1854 cmp r3, #0 1854 cmp r3, #0
1855 blt 10f 1855 blt 10f
1856 weight_\w vadd.s16 1856 weight_\w vadd.s16
1857 10: rsb r3, r3, #0 1857 10: rsb r3, r3, #0
1858 weight_\w vsub.s16 1858 weight_\w vsub.s16
1859 .endfunc 1859 endfunc
1860 .endm 1860 .endm
1861 1861
1862 .macro weight_entry w, h, b=1 1862 .macro weight_entry w, h, b=1
1863 function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1 1863 function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1
1864 mov ip, #\h 1864 mov ip, #\h
1865 .if \b 1865 .if \b
1866 b weight_h264_pixels_\w\()_neon 1866 b weight_h264_pixels_\w\()_neon
1867 .endif 1867 .endif
1868 .endfunc 1868 endfunc
1869 .endm 1869 .endm
1870 1870
1871 weight_entry 16, 8 1871 weight_entry 16, 8
1872 weight_entry 16, 16, b=0 1872 weight_entry 16, 16, b=0
1873 weight_func 16 1873 weight_func 16