Mercurial > libavcodec.hg
comparison arm/h264dsp_neon.S @ 11443:361a5fcb4393 libavcodec
ARM: set size of asm functions in object files
author | mru |
---|---|
date | Tue, 09 Mar 2010 16:17:56 +0000 |
parents | 5506cbb012b4 |
children | a2de48cf798e |
comparison
equal
deleted
inserted
replaced
11442:fe32d9ba1c86 | 11443:361a5fcb4393 |
---|---|
181 vst1.64 {d16}, [r0,:64], r2 | 181 vst1.64 {d16}, [r0,:64], r2 |
182 vst1.64 {d17}, [r0,:64], r2 | 182 vst1.64 {d17}, [r0,:64], r2 |
183 bgt 5b | 183 bgt 5b |
184 | 184 |
185 pop {r4-r7, pc} | 185 pop {r4-r7, pc} |
186 .endfunc | 186 endfunc |
187 .endm | 187 .endm |
188 | 188 |
189 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ | 189 /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ |
190 .macro h264_chroma_mc4 type | 190 .macro h264_chroma_mc4 type |
191 function ff_\type\()_h264_chroma_mc4_neon, export=1 | 191 function ff_\type\()_h264_chroma_mc4_neon, export=1 |
315 vst1.32 {d16[0]}, [r0,:32], r2 | 315 vst1.32 {d16[0]}, [r0,:32], r2 |
316 vst1.32 {d16[1]}, [r0,:32], r2 | 316 vst1.32 {d16[1]}, [r0,:32], r2 |
317 bgt 5b | 317 bgt 5b |
318 | 318 |
319 pop {r4-r7, pc} | 319 pop {r4-r7, pc} |
320 .endfunc | 320 endfunc |
321 .endm | 321 .endm |
322 | 322 |
323 .macro h264_chroma_mc2 type | 323 .macro h264_chroma_mc2 type |
324 function ff_\type\()_h264_chroma_mc2_neon, export=1 | 324 function ff_\type\()_h264_chroma_mc2_neon, export=1 |
325 push {r4-r6, lr} | 325 push {r4-r6, lr} |
383 vst1.16 {d16[1]}, [r0,:16], r2 | 383 vst1.16 {d16[1]}, [r0,:16], r2 |
384 .endif | 384 .endif |
385 subs r3, r3, #2 | 385 subs r3, r3, #2 |
386 bgt 2b | 386 bgt 2b |
387 pop {r4-r6, pc} | 387 pop {r4-r6, pc} |
388 .endfunc | 388 endfunc |
389 .endm | 389 .endm |
390 | 390 |
391 .text | 391 .text |
392 .align | 392 .align |
393 | 393 |
516 vst1.64 {d0, d1}, [r0,:128], r1 | 516 vst1.64 {d0, d1}, [r0,:128], r1 |
517 vst1.64 {d10,d11}, [r0,:128] | 517 vst1.64 {d10,d11}, [r0,:128] |
518 | 518 |
519 align_pop_regs | 519 align_pop_regs |
520 bx lr | 520 bx lr |
521 .endfunc | 521 endfunc |
522 | 522 |
523 function ff_h264_h_loop_filter_luma_neon, export=1 | 523 function ff_h264_h_loop_filter_luma_neon, export=1 |
524 h264_loop_filter_start | 524 h264_loop_filter_start |
525 | 525 |
526 sub r0, r0, #4 | 526 sub r0, r0, #4 |
568 vst1.32 {d1[1]}, [r0], r1 | 568 vst1.32 {d1[1]}, [r0], r1 |
569 vst1.32 {d11[1]}, [r0], r1 | 569 vst1.32 {d11[1]}, [r0], r1 |
570 | 570 |
571 align_pop_regs | 571 align_pop_regs |
572 bx lr | 572 bx lr |
573 .endfunc | 573 endfunc |
574 | 574 |
575 .macro h264_loop_filter_chroma | 575 .macro h264_loop_filter_chroma |
576 vdup.8 d22, r2 @ alpha | 576 vdup.8 d22, r2 @ alpha |
577 vmovl.u8 q12, d24 | 577 vmovl.u8 q12, d24 |
578 vabd.u8 d26, d16, d0 @ abs(p0 - q0) | 578 vabd.u8 d26, d16, d0 @ abs(p0 - q0) |
619 sub r0, r0, r1, lsl #1 | 619 sub r0, r0, r1, lsl #1 |
620 vst1.64 {d16}, [r0,:64], r1 | 620 vst1.64 {d16}, [r0,:64], r1 |
621 vst1.64 {d0}, [r0,:64], r1 | 621 vst1.64 {d0}, [r0,:64], r1 |
622 | 622 |
623 bx lr | 623 bx lr |
624 .endfunc | 624 endfunc |
625 | 625 |
626 function ff_h264_h_loop_filter_chroma_neon, export=1 | 626 function ff_h264_h_loop_filter_chroma_neon, export=1 |
627 h264_loop_filter_start | 627 h264_loop_filter_start |
628 | 628 |
629 sub r0, r0, #2 | 629 sub r0, r0, #2 |
657 vst1.32 {d16[1]}, [r0], r1 | 657 vst1.32 {d16[1]}, [r0], r1 |
658 vst1.32 {d0[1]}, [r0], r1 | 658 vst1.32 {d0[1]}, [r0], r1 |
659 vst1.32 {d2[1]}, [r0], r1 | 659 vst1.32 {d2[1]}, [r0], r1 |
660 | 660 |
661 bx lr | 661 bx lr |
662 .endfunc | 662 endfunc |
663 | 663 |
664 /* H.264 qpel MC */ | 664 /* H.264 qpel MC */ |
665 | 665 |
666 .macro lowpass_const r | 666 .macro lowpass_const r |
667 movw \r, #5 | 667 movw \r, #5 |
772 sub r1, r1, r2, lsl #4 | 772 sub r1, r1, r2, lsl #4 |
773 add r1, r1, #8 | 773 add r1, r1, #8 |
774 mov ip, #16 | 774 mov ip, #16 |
775 mov lr, r4 | 775 mov lr, r4 |
776 b put_h264_qpel8_h_lowpass_neon | 776 b put_h264_qpel8_h_lowpass_neon |
777 .endfunc | 777 endfunc |
778 | 778 |
779 .macro h264_qpel_h_lowpass type | 779 .macro h264_qpel_h_lowpass type |
780 function \type\()_h264_qpel16_h_lowpass_neon | 780 function \type\()_h264_qpel16_h_lowpass_neon |
781 push {lr} | 781 push {lr} |
782 mov ip, #16 | 782 mov ip, #16 |
785 sub r1, r1, r2, lsl #4 | 785 sub r1, r1, r2, lsl #4 |
786 add r0, r0, #8 | 786 add r0, r0, #8 |
787 add r1, r1, #8 | 787 add r1, r1, #8 |
788 mov ip, #16 | 788 mov ip, #16 |
789 pop {lr} | 789 pop {lr} |
790 .endfunc | 790 endfunc |
791 | 791 |
792 function \type\()_h264_qpel8_h_lowpass_neon | 792 function \type\()_h264_qpel8_h_lowpass_neon |
793 1: vld1.64 {d0, d1}, [r1], r2 | 793 1: vld1.64 {d0, d1}, [r1], r2 |
794 vld1.64 {d16,d17}, [r1], r2 | 794 vld1.64 {d16,d17}, [r1], r2 |
795 subs ip, ip, #2 | 795 subs ip, ip, #2 |
803 .endif | 803 .endif |
804 vst1.64 {d0}, [r0,:64], r3 | 804 vst1.64 {d0}, [r0,:64], r3 |
805 vst1.64 {d16}, [r0,:64], r3 | 805 vst1.64 {d16}, [r0,:64], r3 |
806 bne 1b | 806 bne 1b |
807 bx lr | 807 bx lr |
808 .endfunc | 808 endfunc |
809 .endm | 809 .endm |
810 | 810 |
811 h264_qpel_h_lowpass put | 811 h264_qpel_h_lowpass put |
812 h264_qpel_h_lowpass avg | 812 h264_qpel_h_lowpass avg |
813 | 813 |
822 add r0, r0, #8 | 822 add r0, r0, #8 |
823 add r1, r1, #8 | 823 add r1, r1, #8 |
824 add r3, r3, #8 | 824 add r3, r3, #8 |
825 mov ip, #16 | 825 mov ip, #16 |
826 pop {lr} | 826 pop {lr} |
827 .endfunc | 827 endfunc |
828 | 828 |
829 function \type\()_h264_qpel8_h_lowpass_l2_neon | 829 function \type\()_h264_qpel8_h_lowpass_l2_neon |
830 1: vld1.64 {d0, d1}, [r1], r2 | 830 1: vld1.64 {d0, d1}, [r1], r2 |
831 vld1.64 {d16,d17}, [r1], r2 | 831 vld1.64 {d16,d17}, [r1], r2 |
832 vld1.64 {d28}, [r3], r2 | 832 vld1.64 {d28}, [r3], r2 |
843 .endif | 843 .endif |
844 vst1.64 {d0}, [r0,:64], r2 | 844 vst1.64 {d0}, [r0,:64], r2 |
845 vst1.64 {d1}, [r0,:64], r2 | 845 vst1.64 {d1}, [r0,:64], r2 |
846 bne 1b | 846 bne 1b |
847 bx lr | 847 bx lr |
848 .endfunc | 848 endfunc |
849 .endm | 849 .endm |
850 | 850 |
851 h264_qpel_h_lowpass_l2 put | 851 h264_qpel_h_lowpass_l2 put |
852 h264_qpel_h_lowpass_l2 avg | 852 h264_qpel_h_lowpass_l2 avg |
853 | 853 |
862 add r1, r1, #8 | 862 add r1, r1, #8 |
863 bl put_h264_qpel8_v_lowpass_neon | 863 bl put_h264_qpel8_v_lowpass_neon |
864 sub r1, r1, r3, lsl #2 | 864 sub r1, r1, r3, lsl #2 |
865 mov lr, r4 | 865 mov lr, r4 |
866 b put_h264_qpel8_v_lowpass_neon | 866 b put_h264_qpel8_v_lowpass_neon |
867 .endfunc | 867 endfunc |
868 | 868 |
869 .macro h264_qpel_v_lowpass type | 869 .macro h264_qpel_v_lowpass type |
870 function \type\()_h264_qpel16_v_lowpass_neon | 870 function \type\()_h264_qpel16_v_lowpass_neon |
871 mov r4, lr | 871 mov r4, lr |
872 bl \type\()_h264_qpel8_v_lowpass_neon | 872 bl \type\()_h264_qpel8_v_lowpass_neon |
878 sub r1, r1, r3, lsl #2 | 878 sub r1, r1, r3, lsl #2 |
879 add r1, r1, #8 | 879 add r1, r1, #8 |
880 bl \type\()_h264_qpel8_v_lowpass_neon | 880 bl \type\()_h264_qpel8_v_lowpass_neon |
881 sub r1, r1, r3, lsl #2 | 881 sub r1, r1, r3, lsl #2 |
882 mov lr, r4 | 882 mov lr, r4 |
883 .endfunc | 883 endfunc |
884 | 884 |
885 function \type\()_h264_qpel8_v_lowpass_neon | 885 function \type\()_h264_qpel8_v_lowpass_neon |
886 vld1.64 {d8}, [r1], r3 | 886 vld1.64 {d8}, [r1], r3 |
887 vld1.64 {d10}, [r1], r3 | 887 vld1.64 {d10}, [r1], r3 |
888 vld1.64 {d12}, [r1], r3 | 888 vld1.64 {d12}, [r1], r3 |
932 vst1.64 {d24}, [r0,:64], r2 | 932 vst1.64 {d24}, [r0,:64], r2 |
933 vst1.64 {d26}, [r0,:64], r2 | 933 vst1.64 {d26}, [r0,:64], r2 |
934 vst1.64 {d28}, [r0,:64], r2 | 934 vst1.64 {d28}, [r0,:64], r2 |
935 | 935 |
936 bx lr | 936 bx lr |
937 .endfunc | 937 endfunc |
938 .endm | 938 .endm |
939 | 939 |
940 h264_qpel_v_lowpass put | 940 h264_qpel_v_lowpass put |
941 h264_qpel_v_lowpass avg | 941 h264_qpel_v_lowpass avg |
942 | 942 |
954 sub r1, r1, r3, lsl #2 | 954 sub r1, r1, r3, lsl #2 |
955 add r1, r1, #8 | 955 add r1, r1, #8 |
956 bl \type\()_h264_qpel8_v_lowpass_l2_neon | 956 bl \type\()_h264_qpel8_v_lowpass_l2_neon |
957 sub r1, r1, r3, lsl #2 | 957 sub r1, r1, r3, lsl #2 |
958 mov lr, r4 | 958 mov lr, r4 |
959 .endfunc | 959 endfunc |
960 | 960 |
961 function \type\()_h264_qpel8_v_lowpass_l2_neon | 961 function \type\()_h264_qpel8_v_lowpass_l2_neon |
962 vld1.64 {d8}, [r1], r3 | 962 vld1.64 {d8}, [r1], r3 |
963 vld1.64 {d10}, [r1], r3 | 963 vld1.64 {d10}, [r1], r3 |
964 vld1.64 {d12}, [r1], r3 | 964 vld1.64 {d12}, [r1], r3 |
1021 vst1.64 {d5}, [r0,:64], r3 | 1021 vst1.64 {d5}, [r0,:64], r3 |
1022 vst1.64 {d10}, [r0,:64], r3 | 1022 vst1.64 {d10}, [r0,:64], r3 |
1023 vst1.64 {d11}, [r0,:64], r3 | 1023 vst1.64 {d11}, [r0,:64], r3 |
1024 | 1024 |
1025 bx lr | 1025 bx lr |
1026 .endfunc | 1026 endfunc |
1027 .endm | 1027 .endm |
1028 | 1028 |
1029 h264_qpel_v_lowpass_l2 put | 1029 h264_qpel_v_lowpass_l2 put |
1030 h264_qpel_v_lowpass_l2 avg | 1030 h264_qpel_v_lowpass_l2 avg |
1031 | 1031 |
1091 lowpass_8.16 q8, q15, d16, d17, d30, d31, d15 | 1091 lowpass_8.16 q8, q15, d16, d17, d30, d31, d15 |
1092 | 1092 |
1093 transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11 | 1093 transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11 |
1094 | 1094 |
1095 bx lr | 1095 bx lr |
1096 .endfunc | 1096 endfunc |
1097 | 1097 |
1098 .macro h264_qpel8_hv_lowpass type | 1098 .macro h264_qpel8_hv_lowpass type |
1099 function \type\()_h264_qpel8_hv_lowpass_neon | 1099 function \type\()_h264_qpel8_hv_lowpass_neon |
1100 mov r10, lr | 1100 mov r10, lr |
1101 bl put_h264_qpel8_hv_lowpass_neon_top | 1101 bl put_h264_qpel8_hv_lowpass_neon_top |
1127 vst1.64 {d10}, [r0,:64], r2 | 1127 vst1.64 {d10}, [r0,:64], r2 |
1128 vst1.64 {d11}, [r0,:64], r2 | 1128 vst1.64 {d11}, [r0,:64], r2 |
1129 | 1129 |
1130 mov lr, r10 | 1130 mov lr, r10 |
1131 bx lr | 1131 bx lr |
1132 .endfunc | 1132 endfunc |
1133 .endm | 1133 .endm |
1134 | 1134 |
1135 h264_qpel8_hv_lowpass put | 1135 h264_qpel8_hv_lowpass put |
1136 h264_qpel8_hv_lowpass avg | 1136 h264_qpel8_hv_lowpass avg |
1137 | 1137 |
1176 vst1.64 {d6}, [r0,:64], r3 | 1176 vst1.64 {d6}, [r0,:64], r3 |
1177 vst1.64 {d7}, [r0,:64], r3 | 1177 vst1.64 {d7}, [r0,:64], r3 |
1178 | 1178 |
1179 mov lr, r10 | 1179 mov lr, r10 |
1180 bx lr | 1180 bx lr |
1181 .endfunc | 1181 endfunc |
1182 .endm | 1182 .endm |
1183 | 1183 |
1184 h264_qpel8_hv_lowpass_l2 put | 1184 h264_qpel8_hv_lowpass_l2 put |
1185 h264_qpel8_hv_lowpass_l2 avg | 1185 h264_qpel8_hv_lowpass_l2 avg |
1186 | 1186 |
1197 add r0, r0, #8 | 1197 add r0, r0, #8 |
1198 bl \type\()_h264_qpel8_hv_lowpass_neon | 1198 bl \type\()_h264_qpel8_hv_lowpass_neon |
1199 sub r1, r1, r3, lsl #2 | 1199 sub r1, r1, r3, lsl #2 |
1200 mov lr, r9 | 1200 mov lr, r9 |
1201 b \type\()_h264_qpel8_hv_lowpass_neon | 1201 b \type\()_h264_qpel8_hv_lowpass_neon |
1202 .endfunc | 1202 endfunc |
1203 | 1203 |
1204 function \type\()_h264_qpel16_hv_lowpass_l2_neon | 1204 function \type\()_h264_qpel16_hv_lowpass_l2_neon |
1205 mov r9, lr | 1205 mov r9, lr |
1206 sub r2, r4, #256 | 1206 sub r2, r4, #256 |
1207 bl \type\()_h264_qpel8_hv_lowpass_l2_neon | 1207 bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
1214 add r0, r0, #8 | 1214 add r0, r0, #8 |
1215 bl \type\()_h264_qpel8_hv_lowpass_l2_neon | 1215 bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
1216 sub r1, r1, r3, lsl #2 | 1216 sub r1, r1, r3, lsl #2 |
1217 mov lr, r9 | 1217 mov lr, r9 |
1218 b \type\()_h264_qpel8_hv_lowpass_l2_neon | 1218 b \type\()_h264_qpel8_hv_lowpass_l2_neon |
1219 .endfunc | 1219 endfunc |
1220 .endm | 1220 .endm |
1221 | 1221 |
1222 h264_qpel16_hv put | 1222 h264_qpel16_hv put |
1223 h264_qpel16_hv avg | 1223 h264_qpel16_hv avg |
1224 | 1224 |
1227 lowpass_const r3 | 1227 lowpass_const r3 |
1228 mov r3, r1 | 1228 mov r3, r1 |
1229 sub r1, r1, #2 | 1229 sub r1, r1, #2 |
1230 mov ip, #8 | 1230 mov ip, #8 |
1231 b \type\()_h264_qpel8_h_lowpass_l2_neon | 1231 b \type\()_h264_qpel8_h_lowpass_l2_neon |
1232 .endfunc | 1232 endfunc |
1233 | 1233 |
1234 function ff_\type\()_h264_qpel8_mc20_neon, export=1 | 1234 function ff_\type\()_h264_qpel8_mc20_neon, export=1 |
1235 lowpass_const r3 | 1235 lowpass_const r3 |
1236 sub r1, r1, #2 | 1236 sub r1, r1, #2 |
1237 mov r3, r2 | 1237 mov r3, r2 |
1238 mov ip, #8 | 1238 mov ip, #8 |
1239 b \type\()_h264_qpel8_h_lowpass_neon | 1239 b \type\()_h264_qpel8_h_lowpass_neon |
1240 .endfunc | 1240 endfunc |
1241 | 1241 |
1242 function ff_\type\()_h264_qpel8_mc30_neon, export=1 | 1242 function ff_\type\()_h264_qpel8_mc30_neon, export=1 |
1243 lowpass_const r3 | 1243 lowpass_const r3 |
1244 add r3, r1, #1 | 1244 add r3, r1, #1 |
1245 sub r1, r1, #2 | 1245 sub r1, r1, #2 |
1246 mov ip, #8 | 1246 mov ip, #8 |
1247 b \type\()_h264_qpel8_h_lowpass_l2_neon | 1247 b \type\()_h264_qpel8_h_lowpass_l2_neon |
1248 .endfunc | 1248 endfunc |
1249 | 1249 |
1250 function ff_\type\()_h264_qpel8_mc01_neon, export=1 | 1250 function ff_\type\()_h264_qpel8_mc01_neon, export=1 |
1251 push {lr} | 1251 push {lr} |
1252 mov ip, r1 | 1252 mov ip, r1 |
1253 \type\()_h264_qpel8_mc01: | 1253 \type\()_h264_qpel8_mc01: |
1256 sub r1, r1, r2, lsl #1 | 1256 sub r1, r1, r2, lsl #1 |
1257 vpush {d8-d15} | 1257 vpush {d8-d15} |
1258 bl \type\()_h264_qpel8_v_lowpass_l2_neon | 1258 bl \type\()_h264_qpel8_v_lowpass_l2_neon |
1259 vpop {d8-d15} | 1259 vpop {d8-d15} |
1260 pop {pc} | 1260 pop {pc} |
1261 .endfunc | 1261 endfunc |
1262 | 1262 |
1263 function ff_\type\()_h264_qpel8_mc11_neon, export=1 | 1263 function ff_\type\()_h264_qpel8_mc11_neon, export=1 |
1264 push {r0, r1, r11, lr} | 1264 push {r0, r1, r11, lr} |
1265 \type\()_h264_qpel8_mc11: | 1265 \type\()_h264_qpel8_mc11: |
1266 lowpass_const r3 | 1266 lowpass_const r3 |
1280 mov r2, #8 | 1280 mov r2, #8 |
1281 bl \type\()_h264_qpel8_v_lowpass_l2_neon | 1281 bl \type\()_h264_qpel8_v_lowpass_l2_neon |
1282 vpop {d8-d15} | 1282 vpop {d8-d15} |
1283 add sp, r11, #8 | 1283 add sp, r11, #8 |
1284 pop {r11, pc} | 1284 pop {r11, pc} |
1285 .endfunc | 1285 endfunc |
1286 | 1286 |
1287 function ff_\type\()_h264_qpel8_mc21_neon, export=1 | 1287 function ff_\type\()_h264_qpel8_mc21_neon, export=1 |
1288 push {r0, r1, r4, r10, r11, lr} | 1288 push {r0, r1, r4, r10, r11, lr} |
1289 \type\()_h264_qpel8_mc21: | 1289 \type\()_h264_qpel8_mc21: |
1290 lowpass_const r3 | 1290 lowpass_const r3 |
1305 sub r2, r4, #64 | 1305 sub r2, r4, #64 |
1306 bl \type\()_h264_qpel8_hv_lowpass_l2_neon | 1306 bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
1307 vpop {d8-d15} | 1307 vpop {d8-d15} |
1308 add sp, r11, #8 | 1308 add sp, r11, #8 |
1309 pop {r4, r10, r11, pc} | 1309 pop {r4, r10, r11, pc} |
1310 .endfunc | 1310 endfunc |
1311 | 1311 |
1312 function ff_\type\()_h264_qpel8_mc31_neon, export=1 | 1312 function ff_\type\()_h264_qpel8_mc31_neon, export=1 |
1313 add r1, r1, #1 | 1313 add r1, r1, #1 |
1314 push {r0, r1, r11, lr} | 1314 push {r0, r1, r11, lr} |
1315 sub r1, r1, #1 | 1315 sub r1, r1, #1 |
1316 b \type\()_h264_qpel8_mc11 | 1316 b \type\()_h264_qpel8_mc11 |
1317 .endfunc | 1317 endfunc |
1318 | 1318 |
1319 function ff_\type\()_h264_qpel8_mc02_neon, export=1 | 1319 function ff_\type\()_h264_qpel8_mc02_neon, export=1 |
1320 push {lr} | 1320 push {lr} |
1321 lowpass_const r3 | 1321 lowpass_const r3 |
1322 sub r1, r1, r2, lsl #1 | 1322 sub r1, r1, r2, lsl #1 |
1323 mov r3, r2 | 1323 mov r3, r2 |
1324 vpush {d8-d15} | 1324 vpush {d8-d15} |
1325 bl \type\()_h264_qpel8_v_lowpass_neon | 1325 bl \type\()_h264_qpel8_v_lowpass_neon |
1326 vpop {d8-d15} | 1326 vpop {d8-d15} |
1327 pop {pc} | 1327 pop {pc} |
1328 .endfunc | 1328 endfunc |
1329 | 1329 |
1330 function ff_\type\()_h264_qpel8_mc12_neon, export=1 | 1330 function ff_\type\()_h264_qpel8_mc12_neon, export=1 |
1331 push {r0, r1, r4, r10, r11, lr} | 1331 push {r0, r1, r4, r10, r11, lr} |
1332 \type\()_h264_qpel8_mc12: | 1332 \type\()_h264_qpel8_mc12: |
1333 lowpass_const r3 | 1333 lowpass_const r3 |
1347 sub r2, r4, #64 | 1347 sub r2, r4, #64 |
1348 bl \type\()_h264_qpel8_hv_lowpass_l2_neon | 1348 bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
1349 vpop {d8-d15} | 1349 vpop {d8-d15} |
1350 add sp, r11, #8 | 1350 add sp, r11, #8 |
1351 pop {r4, r10, r11, pc} | 1351 pop {r4, r10, r11, pc} |
1352 .endfunc | 1352 endfunc |
1353 | 1353 |
1354 function ff_\type\()_h264_qpel8_mc22_neon, export=1 | 1354 function ff_\type\()_h264_qpel8_mc22_neon, export=1 |
1355 push {r4, r10, r11, lr} | 1355 push {r4, r10, r11, lr} |
1356 mov r11, sp | 1356 mov r11, sp |
1357 bic sp, sp, #15 | 1357 bic sp, sp, #15 |
1363 vpush {d8-d15} | 1363 vpush {d8-d15} |
1364 bl \type\()_h264_qpel8_hv_lowpass_neon | 1364 bl \type\()_h264_qpel8_hv_lowpass_neon |
1365 vpop {d8-d15} | 1365 vpop {d8-d15} |
1366 mov sp, r11 | 1366 mov sp, r11 |
1367 pop {r4, r10, r11, pc} | 1367 pop {r4, r10, r11, pc} |
1368 .endfunc | 1368 endfunc |
1369 | 1369 |
1370 function ff_\type\()_h264_qpel8_mc32_neon, export=1 | 1370 function ff_\type\()_h264_qpel8_mc32_neon, export=1 |
1371 push {r0, r1, r4, r10, r11, lr} | 1371 push {r0, r1, r4, r10, r11, lr} |
1372 add r1, r1, #1 | 1372 add r1, r1, #1 |
1373 b \type\()_h264_qpel8_mc12 | 1373 b \type\()_h264_qpel8_mc12 |
1374 .endfunc | 1374 endfunc |
1375 | 1375 |
1376 function ff_\type\()_h264_qpel8_mc03_neon, export=1 | 1376 function ff_\type\()_h264_qpel8_mc03_neon, export=1 |
1377 push {lr} | 1377 push {lr} |
1378 add ip, r1, r2 | 1378 add ip, r1, r2 |
1379 b \type\()_h264_qpel8_mc01 | 1379 b \type\()_h264_qpel8_mc01 |
1380 .endfunc | 1380 endfunc |
1381 | 1381 |
1382 function ff_\type\()_h264_qpel8_mc13_neon, export=1 | 1382 function ff_\type\()_h264_qpel8_mc13_neon, export=1 |
1383 push {r0, r1, r11, lr} | 1383 push {r0, r1, r11, lr} |
1384 add r1, r1, r2 | 1384 add r1, r1, r2 |
1385 b \type\()_h264_qpel8_mc11 | 1385 b \type\()_h264_qpel8_mc11 |
1386 .endfunc | 1386 endfunc |
1387 | 1387 |
1388 function ff_\type\()_h264_qpel8_mc23_neon, export=1 | 1388 function ff_\type\()_h264_qpel8_mc23_neon, export=1 |
1389 push {r0, r1, r4, r10, r11, lr} | 1389 push {r0, r1, r4, r10, r11, lr} |
1390 add r1, r1, r2 | 1390 add r1, r1, r2 |
1391 b \type\()_h264_qpel8_mc21 | 1391 b \type\()_h264_qpel8_mc21 |
1392 .endfunc | 1392 endfunc |
1393 | 1393 |
1394 function ff_\type\()_h264_qpel8_mc33_neon, export=1 | 1394 function ff_\type\()_h264_qpel8_mc33_neon, export=1 |
1395 add r1, r1, #1 | 1395 add r1, r1, #1 |
1396 push {r0, r1, r11, lr} | 1396 push {r0, r1, r11, lr} |
1397 add r1, r1, r2 | 1397 add r1, r1, r2 |
1398 sub r1, r1, #1 | 1398 sub r1, r1, #1 |
1399 b \type\()_h264_qpel8_mc11 | 1399 b \type\()_h264_qpel8_mc11 |
1400 .endfunc | 1400 endfunc |
1401 .endm | 1401 .endm |
1402 | 1402 |
1403 h264_qpel8 put | 1403 h264_qpel8 put |
1404 h264_qpel8 avg | 1404 h264_qpel8 avg |
1405 | 1405 |
1407 function ff_\type\()_h264_qpel16_mc10_neon, export=1 | 1407 function ff_\type\()_h264_qpel16_mc10_neon, export=1 |
1408 lowpass_const r3 | 1408 lowpass_const r3 |
1409 mov r3, r1 | 1409 mov r3, r1 |
1410 sub r1, r1, #2 | 1410 sub r1, r1, #2 |
1411 b \type\()_h264_qpel16_h_lowpass_l2_neon | 1411 b \type\()_h264_qpel16_h_lowpass_l2_neon |
1412 .endfunc | 1412 endfunc |
1413 | 1413 |
1414 function ff_\type\()_h264_qpel16_mc20_neon, export=1 | 1414 function ff_\type\()_h264_qpel16_mc20_neon, export=1 |
1415 lowpass_const r3 | 1415 lowpass_const r3 |
1416 sub r1, r1, #2 | 1416 sub r1, r1, #2 |
1417 mov r3, r2 | 1417 mov r3, r2 |
1418 b \type\()_h264_qpel16_h_lowpass_neon | 1418 b \type\()_h264_qpel16_h_lowpass_neon |
1419 .endfunc | 1419 endfunc |
1420 | 1420 |
1421 function ff_\type\()_h264_qpel16_mc30_neon, export=1 | 1421 function ff_\type\()_h264_qpel16_mc30_neon, export=1 |
1422 lowpass_const r3 | 1422 lowpass_const r3 |
1423 add r3, r1, #1 | 1423 add r3, r1, #1 |
1424 sub r1, r1, #2 | 1424 sub r1, r1, #2 |
1425 b \type\()_h264_qpel16_h_lowpass_l2_neon | 1425 b \type\()_h264_qpel16_h_lowpass_l2_neon |
1426 .endfunc | 1426 endfunc |
1427 | 1427 |
1428 function ff_\type\()_h264_qpel16_mc01_neon, export=1 | 1428 function ff_\type\()_h264_qpel16_mc01_neon, export=1 |
1429 push {r4, lr} | 1429 push {r4, lr} |
1430 mov ip, r1 | 1430 mov ip, r1 |
1431 \type\()_h264_qpel16_mc01: | 1431 \type\()_h264_qpel16_mc01: |
1434 sub r1, r1, r2, lsl #1 | 1434 sub r1, r1, r2, lsl #1 |
1435 vpush {d8-d15} | 1435 vpush {d8-d15} |
1436 bl \type\()_h264_qpel16_v_lowpass_l2_neon | 1436 bl \type\()_h264_qpel16_v_lowpass_l2_neon |
1437 vpop {d8-d15} | 1437 vpop {d8-d15} |
1438 pop {r4, pc} | 1438 pop {r4, pc} |
1439 .endfunc | 1439 endfunc |
1440 | 1440 |
1441 function ff_\type\()_h264_qpel16_mc11_neon, export=1 | 1441 function ff_\type\()_h264_qpel16_mc11_neon, export=1 |
1442 push {r0, r1, r4, r11, lr} | 1442 push {r0, r1, r4, r11, lr} |
1443 \type\()_h264_qpel16_mc11: | 1443 \type\()_h264_qpel16_mc11: |
1444 lowpass_const r3 | 1444 lowpass_const r3 |
1457 mov r2, #16 | 1457 mov r2, #16 |
1458 bl \type\()_h264_qpel16_v_lowpass_l2_neon | 1458 bl \type\()_h264_qpel16_v_lowpass_l2_neon |
1459 vpop {d8-d15} | 1459 vpop {d8-d15} |
1460 add sp, r11, #8 | 1460 add sp, r11, #8 |
1461 pop {r4, r11, pc} | 1461 pop {r4, r11, pc} |
1462 .endfunc | 1462 endfunc |
1463 | 1463 |
1464 function ff_\type\()_h264_qpel16_mc21_neon, export=1 | 1464 function ff_\type\()_h264_qpel16_mc21_neon, export=1 |
1465 push {r0, r1, r4-r5, r9-r11, lr} | 1465 push {r0, r1, r4-r5, r9-r11, lr} |
1466 \type\()_h264_qpel16_mc21: | 1466 \type\()_h264_qpel16_mc21: |
1467 lowpass_const r3 | 1467 lowpass_const r3 |
1479 mov r3, r2 | 1479 mov r3, r2 |
1480 bl \type\()_h264_qpel16_hv_lowpass_l2_neon | 1480 bl \type\()_h264_qpel16_hv_lowpass_l2_neon |
1481 vpop {d8-d15} | 1481 vpop {d8-d15} |
1482 add sp, r11, #8 | 1482 add sp, r11, #8 |
1483 pop {r4-r5, r9-r11, pc} | 1483 pop {r4-r5, r9-r11, pc} |
1484 .endfunc | 1484 endfunc |
1485 | 1485 |
1486 function ff_\type\()_h264_qpel16_mc31_neon, export=1 | 1486 function ff_\type\()_h264_qpel16_mc31_neon, export=1 |
1487 add r1, r1, #1 | 1487 add r1, r1, #1 |
1488 push {r0, r1, r4, r11, lr} | 1488 push {r0, r1, r4, r11, lr} |
1489 sub r1, r1, #1 | 1489 sub r1, r1, #1 |
1490 b \type\()_h264_qpel16_mc11 | 1490 b \type\()_h264_qpel16_mc11 |
1491 .endfunc | 1491 endfunc |
1492 | 1492 |
1493 function ff_\type\()_h264_qpel16_mc02_neon, export=1 | 1493 function ff_\type\()_h264_qpel16_mc02_neon, export=1 |
1494 push {r4, lr} | 1494 push {r4, lr} |
1495 lowpass_const r3 | 1495 lowpass_const r3 |
1496 sub r1, r1, r2, lsl #1 | 1496 sub r1, r1, r2, lsl #1 |
1497 mov r3, r2 | 1497 mov r3, r2 |
1498 vpush {d8-d15} | 1498 vpush {d8-d15} |
1499 bl \type\()_h264_qpel16_v_lowpass_neon | 1499 bl \type\()_h264_qpel16_v_lowpass_neon |
1500 vpop {d8-d15} | 1500 vpop {d8-d15} |
1501 pop {r4, pc} | 1501 pop {r4, pc} |
1502 .endfunc | 1502 endfunc |
1503 | 1503 |
1504 function ff_\type\()_h264_qpel16_mc12_neon, export=1 | 1504 function ff_\type\()_h264_qpel16_mc12_neon, export=1 |
1505 push {r0, r1, r4-r5, r9-r11, lr} | 1505 push {r0, r1, r4-r5, r9-r11, lr} |
1506 \type\()_h264_qpel16_mc12: | 1506 \type\()_h264_qpel16_mc12: |
1507 lowpass_const r3 | 1507 lowpass_const r3 |
1520 mov r2, r3 | 1520 mov r2, r3 |
1521 bl \type\()_h264_qpel16_hv_lowpass_l2_neon | 1521 bl \type\()_h264_qpel16_hv_lowpass_l2_neon |
1522 vpop {d8-d15} | 1522 vpop {d8-d15} |
1523 add sp, r11, #8 | 1523 add sp, r11, #8 |
1524 pop {r4-r5, r9-r11, pc} | 1524 pop {r4-r5, r9-r11, pc} |
1525 .endfunc | 1525 endfunc |
1526 | 1526 |
1527 function ff_\type\()_h264_qpel16_mc22_neon, export=1 | 1527 function ff_\type\()_h264_qpel16_mc22_neon, export=1 |
1528 push {r4, r9-r11, lr} | 1528 push {r4, r9-r11, lr} |
1529 lowpass_const r3 | 1529 lowpass_const r3 |
1530 mov r11, sp | 1530 mov r11, sp |
1537 vpush {d8-d15} | 1537 vpush {d8-d15} |
1538 bl \type\()_h264_qpel16_hv_lowpass_neon | 1538 bl \type\()_h264_qpel16_hv_lowpass_neon |
1539 vpop {d8-d15} | 1539 vpop {d8-d15} |
1540 mov sp, r11 | 1540 mov sp, r11 |
1541 pop {r4, r9-r11, pc} | 1541 pop {r4, r9-r11, pc} |
1542 .endfunc | 1542 endfunc |
1543 | 1543 |
1544 function ff_\type\()_h264_qpel16_mc32_neon, export=1 | 1544 function ff_\type\()_h264_qpel16_mc32_neon, export=1 |
1545 push {r0, r1, r4-r5, r9-r11, lr} | 1545 push {r0, r1, r4-r5, r9-r11, lr} |
1546 add r1, r1, #1 | 1546 add r1, r1, #1 |
1547 b \type\()_h264_qpel16_mc12 | 1547 b \type\()_h264_qpel16_mc12 |
1548 .endfunc | 1548 endfunc |
1549 | 1549 |
1550 function ff_\type\()_h264_qpel16_mc03_neon, export=1 | 1550 function ff_\type\()_h264_qpel16_mc03_neon, export=1 |
1551 push {r4, lr} | 1551 push {r4, lr} |
1552 add ip, r1, r2 | 1552 add ip, r1, r2 |
1553 b \type\()_h264_qpel16_mc01 | 1553 b \type\()_h264_qpel16_mc01 |
1554 .endfunc | 1554 endfunc |
1555 | 1555 |
1556 function ff_\type\()_h264_qpel16_mc13_neon, export=1 | 1556 function ff_\type\()_h264_qpel16_mc13_neon, export=1 |
1557 push {r0, r1, r4, r11, lr} | 1557 push {r0, r1, r4, r11, lr} |
1558 add r1, r1, r2 | 1558 add r1, r1, r2 |
1559 b \type\()_h264_qpel16_mc11 | 1559 b \type\()_h264_qpel16_mc11 |
1560 .endfunc | 1560 endfunc |
1561 | 1561 |
1562 function ff_\type\()_h264_qpel16_mc23_neon, export=1 | 1562 function ff_\type\()_h264_qpel16_mc23_neon, export=1 |
1563 push {r0, r1, r4-r5, r9-r11, lr} | 1563 push {r0, r1, r4-r5, r9-r11, lr} |
1564 add r1, r1, r2 | 1564 add r1, r1, r2 |
1565 b \type\()_h264_qpel16_mc21 | 1565 b \type\()_h264_qpel16_mc21 |
1566 .endfunc | 1566 endfunc |
1567 | 1567 |
1568 function ff_\type\()_h264_qpel16_mc33_neon, export=1 | 1568 function ff_\type\()_h264_qpel16_mc33_neon, export=1 |
1569 add r1, r1, #1 | 1569 add r1, r1, #1 |
1570 push {r0, r1, r4, r11, lr} | 1570 push {r0, r1, r4, r11, lr} |
1571 add r1, r1, r2 | 1571 add r1, r1, r2 |
1572 sub r1, r1, #1 | 1572 sub r1, r1, #1 |
1573 b \type\()_h264_qpel16_mc11 | 1573 b \type\()_h264_qpel16_mc11 |
1574 .endfunc | 1574 endfunc |
1575 .endm | 1575 .endm |
1576 | 1576 |
1577 h264_qpel16 put | 1577 h264_qpel16 put |
1578 h264_qpel16 avg | 1578 h264_qpel16 avg |
1579 | 1579 |
1717 30: rsb r4, r4, #0 | 1717 30: rsb r4, r4, #0 |
1718 rsb r5, r5, #0 | 1718 rsb r5, r5, #0 |
1719 biweight_\w vmlsl.u8, vmlsl.u8 | 1719 biweight_\w vmlsl.u8, vmlsl.u8 |
1720 40: rsb r5, r5, #0 | 1720 40: rsb r5, r5, #0 |
1721 biweight_\w vmlsl.u8, vmlal.u8 | 1721 biweight_\w vmlsl.u8, vmlal.u8 |
1722 .endfunc | 1722 endfunc |
1723 .endm | 1723 .endm |
1724 | 1724 |
1725 .macro biweight_entry w, h, b=1 | 1725 .macro biweight_entry w, h, b=1 |
1726 function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1 | 1726 function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1 |
1727 mov ip, #\h | 1727 mov ip, #\h |
1728 .if \b | 1728 .if \b |
1729 b biweight_h264_pixels_\w\()_neon | 1729 b biweight_h264_pixels_\w\()_neon |
1730 .endif | 1730 .endif |
1731 .endfunc | 1731 endfunc |
1732 .endm | 1732 .endm |
1733 | 1733 |
1734 biweight_entry 16, 8 | 1734 biweight_entry 16, 8 |
1735 biweight_entry 16, 16, b=0 | 1735 biweight_entry 16, 16, b=0 |
1736 biweight_func 16 | 1736 biweight_func 16 |
1854 cmp r3, #0 | 1854 cmp r3, #0 |
1855 blt 10f | 1855 blt 10f |
1856 weight_\w vadd.s16 | 1856 weight_\w vadd.s16 |
1857 10: rsb r3, r3, #0 | 1857 10: rsb r3, r3, #0 |
1858 weight_\w vsub.s16 | 1858 weight_\w vsub.s16 |
1859 .endfunc | 1859 endfunc |
1860 .endm | 1860 .endm |
1861 | 1861 |
1862 .macro weight_entry w, h, b=1 | 1862 .macro weight_entry w, h, b=1 |
1863 function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1 | 1863 function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1 |
1864 mov ip, #\h | 1864 mov ip, #\h |
1865 .if \b | 1865 .if \b |
1866 b weight_h264_pixels_\w\()_neon | 1866 b weight_h264_pixels_\w\()_neon |
1867 .endif | 1867 .endif |
1868 .endfunc | 1868 endfunc |
1869 .endm | 1869 .endm |
1870 | 1870 |
1871 weight_entry 16, 8 | 1871 weight_entry 16, 8 |
1872 weight_entry 16, 16, b=0 | 1872 weight_entry 16, 16, b=0 |
1873 weight_func 16 | 1873 weight_func 16 |