Mercurial > libavcodec.hg
comparison x86/vp8dsp.asm @ 12082:8527154f6e81 libavcodec
SSSE3 versions of vp8 width4 bilinear MC functions
author | darkshikari |
---|---|
date | Sat, 03 Jul 2010 00:48:12 +0000 |
parents | b8f80fe02861 |
children | d780ae746855 |
comparison
equal
deleted
inserted
replaced
12081:812e23197d64 | 12082:8527154f6e81 |
---|---|
768 INIT_MMX | 768 INIT_MMX |
769 FILTER_BILINEAR mmxext, 4, 0 | 769 FILTER_BILINEAR mmxext, 4, 0 |
770 INIT_XMM | 770 INIT_XMM |
771 FILTER_BILINEAR sse2, 8, 7 | 771 FILTER_BILINEAR sse2, 8, 7 |
772 | 772 |
773 cglobal put_vp8_bilinear8_v_ssse3, 7,7,5 | 773 %macro FILTER_BILINEAR_SSSE3 1 |
774 cglobal put_vp8_bilinear%1_v_ssse3, 7,7 | |
774 shl r6d, 4 | 775 shl r6d, 4 |
775 %ifdef PIC | 776 %ifdef PIC |
776 lea r11, [bilinear_filter_vb_m] | 777 lea r11, [bilinear_filter_vb_m] |
777 %endif | 778 %endif |
778 pxor m4, m4 | 779 pxor m4, m4 |
787 pmaddubsw m1, m3 | 788 pmaddubsw m1, m3 |
788 psraw m0, 2 | 789 psraw m0, 2 |
789 psraw m1, 2 | 790 psraw m1, 2 |
790 pavgw m0, m4 | 791 pavgw m0, m4 |
791 pavgw m1, m4 | 792 pavgw m1, m4 |
793 %if mmsize==8 | |
794 packuswb m0, m0 | |
795 packuswb m1, m1 | |
796 movh [r0+r1*0], m0 | |
797 movh [r0+r1*1], m1 | |
798 %else | |
792 packuswb m0, m1 | 799 packuswb m0, m1 |
793 movh [r0+r1*0], m0 | 800 movh [r0+r1*0], m0 |
794 movhps [r0+r1*1], m0 | 801 movhps [r0+r1*1], m0 |
802 %endif | |
795 | 803 |
796 lea r0, [r0+r1*2] | 804 lea r0, [r0+r1*2] |
797 lea r2, [r2+r3*2] | 805 lea r2, [r2+r3*2] |
798 sub r4, 2 | 806 sub r4, 2 |
799 jg .nextrow | 807 jg .nextrow |
800 REP_RET | 808 REP_RET |
801 | 809 |
802 cglobal put_vp8_bilinear8_h_ssse3, 7,7,5 | 810 cglobal put_vp8_bilinear%1_h_ssse3, 7,7 |
803 shl r5d, 4 | 811 shl r5d, 4 |
804 %ifdef PIC | 812 %ifdef PIC |
805 lea r11, [bilinear_filter_vb_m] | 813 lea r11, [bilinear_filter_vb_m] |
806 %endif | 814 %endif |
807 pxor m4, m4 | 815 pxor m4, m4 |
816 pmaddubsw m1, m3 | 824 pmaddubsw m1, m3 |
817 psraw m0, 2 | 825 psraw m0, 2 |
818 psraw m1, 2 | 826 psraw m1, 2 |
819 pavgw m0, m4 | 827 pavgw m0, m4 |
820 pavgw m1, m4 | 828 pavgw m1, m4 |
829 %if mmsize==8 | |
830 packuswb m0, m0 | |
831 packuswb m1, m1 | |
832 movh [r0+r1*0], m0 | |
833 movh [r0+r1*1], m1 | |
834 %else | |
821 packuswb m0, m1 | 835 packuswb m0, m1 |
822 movh [r0+r1*0], m0 | 836 movh [r0+r1*0], m0 |
823 movhps [r0+r1*1], m0 | 837 movhps [r0+r1*1], m0 |
838 %endif | |
824 | 839 |
825 lea r0, [r0+r1*2] | 840 lea r0, [r0+r1*2] |
826 lea r2, [r2+r3*2] | 841 lea r2, [r2+r3*2] |
827 sub r4, 2 | 842 sub r4, 2 |
828 jg .nextrow | 843 jg .nextrow |
829 REP_RET | 844 REP_RET |
845 %endmacro | |
846 | |
847 INIT_MMX | |
848 FILTER_BILINEAR_SSSE3 4 | |
849 INIT_XMM | |
850 FILTER_BILINEAR_SSSE3 8 | |
830 | 851 |
831 cglobal put_vp8_pixels8_mmx, 5,5 | 852 cglobal put_vp8_pixels8_mmx, 5,5 |
832 .nextrow: | 853 .nextrow: |
833 movq mm0, [r2+r3*0] | 854 movq mm0, [r2+r3*0] |
834 movq mm1, [r2+r3*1] | 855 movq mm1, [r2+r3*1] |