comparison x86/vp8dsp.asm @ 12082:8527154f6e81 libavcodec

SSSE3 versions of vp8 width4 bilinear MC functions
author darkshikari
date Sat, 03 Jul 2010 00:48:12 +0000
parents b8f80fe02861
children d780ae746855
comparison
equal deleted inserted replaced
12081:812e23197d64 12082:8527154f6e81
768 INIT_MMX 768 INIT_MMX
769 FILTER_BILINEAR mmxext, 4, 0 769 FILTER_BILINEAR mmxext, 4, 0
770 INIT_XMM 770 INIT_XMM
771 FILTER_BILINEAR sse2, 8, 7 771 FILTER_BILINEAR sse2, 8, 7
772 772
773 cglobal put_vp8_bilinear8_v_ssse3, 7,7,5 773 %macro FILTER_BILINEAR_SSSE3 1
774 cglobal put_vp8_bilinear%1_v_ssse3, 7,7
774 shl r6d, 4 775 shl r6d, 4
775 %ifdef PIC 776 %ifdef PIC
776 lea r11, [bilinear_filter_vb_m] 777 lea r11, [bilinear_filter_vb_m]
777 %endif 778 %endif
778 pxor m4, m4 779 pxor m4, m4
787 pmaddubsw m1, m3 788 pmaddubsw m1, m3
788 psraw m0, 2 789 psraw m0, 2
789 psraw m1, 2 790 psraw m1, 2
790 pavgw m0, m4 791 pavgw m0, m4
791 pavgw m1, m4 792 pavgw m1, m4
793 %if mmsize==8
794 packuswb m0, m0
795 packuswb m1, m1
796 movh [r0+r1*0], m0
797 movh [r0+r1*1], m1
798 %else
792 packuswb m0, m1 799 packuswb m0, m1
793 movh [r0+r1*0], m0 800 movh [r0+r1*0], m0
794 movhps [r0+r1*1], m0 801 movhps [r0+r1*1], m0
802 %endif
795 803
796 lea r0, [r0+r1*2] 804 lea r0, [r0+r1*2]
797 lea r2, [r2+r3*2] 805 lea r2, [r2+r3*2]
798 sub r4, 2 806 sub r4, 2
799 jg .nextrow 807 jg .nextrow
800 REP_RET 808 REP_RET
801 809
802 cglobal put_vp8_bilinear8_h_ssse3, 7,7,5 810 cglobal put_vp8_bilinear%1_h_ssse3, 7,7
803 shl r5d, 4 811 shl r5d, 4
804 %ifdef PIC 812 %ifdef PIC
805 lea r11, [bilinear_filter_vb_m] 813 lea r11, [bilinear_filter_vb_m]
806 %endif 814 %endif
807 pxor m4, m4 815 pxor m4, m4
816 pmaddubsw m1, m3 824 pmaddubsw m1, m3
817 psraw m0, 2 825 psraw m0, 2
818 psraw m1, 2 826 psraw m1, 2
819 pavgw m0, m4 827 pavgw m0, m4
820 pavgw m1, m4 828 pavgw m1, m4
829 %if mmsize==8
830 packuswb m0, m0
831 packuswb m1, m1
832 movh [r0+r1*0], m0
833 movh [r0+r1*1], m1
834 %else
821 packuswb m0, m1 835 packuswb m0, m1
822 movh [r0+r1*0], m0 836 movh [r0+r1*0], m0
823 movhps [r0+r1*1], m0 837 movhps [r0+r1*1], m0
838 %endif
824 839
825 lea r0, [r0+r1*2] 840 lea r0, [r0+r1*2]
826 lea r2, [r2+r3*2] 841 lea r2, [r2+r3*2]
827 sub r4, 2 842 sub r4, 2
828 jg .nextrow 843 jg .nextrow
829 REP_RET 844 REP_RET
845 %endmacro
846
847 INIT_MMX
848 FILTER_BILINEAR_SSSE3 4
849 INIT_XMM
850 FILTER_BILINEAR_SSSE3 8
830 851
831 cglobal put_vp8_pixels8_mmx, 5,5 852 cglobal put_vp8_pixels8_mmx, 5,5
832 .nextrow: 853 .nextrow:
833 movq mm0, [r2+r3*0] 854 movq mm0, [r2+r3*0]
834 movq mm1, [r2+r3*1] 855 movq mm1, [r2+r3*1]