Mercurial > libavcodec.hg
comparison x86/h264dsp_mmx.c @ 10953:eb9a2581f50e libavcodec
Attempt to fix asm compilation failure.
Only tested on gcc 4 & x86_64.
author | michael |
---|---|
date | Wed, 20 Jan 2010 19:23:19 +0000 |
parents | 3a723e8dcd26 |
children | fdddf3d4238f |
comparison
equal
deleted
inserted
replaced
10952:ea8f891d997d | 10953:eb9a2581f50e |
---|---|
810 ); | 810 ); |
811 | 811 |
812 // could do a special case for dir==0 && edges==1, but it only reduces the | 812 // could do a special case for dir==0 && edges==1, but it only reduces the |
813 // average filter time by 1.2% | 813 // average filter time by 1.2% |
814 for( dir=1; dir>=0; dir-- ) { | 814 for( dir=1; dir>=0; dir-- ) { |
815 const int d_idx = dir ? -8 : -1; | 815 const x86_reg d_idx = dir ? -8 : -1; |
816 const int mask_mv = dir ? mask_mv1 : mask_mv0; | 816 const int mask_mv = dir ? mask_mv1 : mask_mv0; |
817 DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; | 817 DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; |
818 int b_idx, edge, l; | 818 int b_idx, edge, l; |
819 for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) { | 819 for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) { |
820 __asm__ volatile( | 820 __asm__ volatile( |
823 ); | 823 ); |
824 if(!(mask_mv & edge)) { | 824 if(!(mask_mv & edge)) { |
825 __asm__ volatile("pxor %%mm0, %%mm0 \n\t":); | 825 __asm__ volatile("pxor %%mm0, %%mm0 \n\t":); |
826 for( l = bidir; l >= 0; l-- ) { | 826 for( l = bidir; l >= 0; l-- ) { |
827 __asm__ volatile( | 827 __asm__ volatile( |
828 "movd %0, %%mm1 \n\t" | 828 "movd (%0), %%mm1 \n\t" |
829 "punpckldq %1, %%mm1 \n\t" | 829 "punpckldq (%0,%1), %%mm1 \n\t" |
830 "punpckldq %%mm1, %%mm2 \n\t" | 830 "punpckldq %%mm1, %%mm2 \n\t" |
831 "pcmpeqb %%mm2, %%mm1 \n\t" | 831 "pcmpeqb %%mm2, %%mm1 \n\t" |
832 "paddb %%mm6, %%mm1 \n\t" | 832 "paddb %%mm6, %%mm1 \n\t" |
833 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] | 833 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] |
834 "por %%mm1, %%mm0 \n\t" | 834 "por %%mm1, %%mm0 \n\t" |
835 | 835 |
836 "movq %2, %%mm1 \n\t" | 836 "movq (%2), %%mm1 \n\t" |
837 "movq 8+1*%2, %%mm2 \n\t" | 837 "movq 8(%2), %%mm2 \n\t" |
838 "psubw %3, %%mm1 \n\t" | 838 "psubw (%2,%1,4), %%mm1 \n\t" |
839 "psubw 8+1*%3, %%mm2 \n\t" | 839 "psubw 8(%2,%1,4), %%mm2 \n\t" |
840 "packsswb %%mm2, %%mm1 \n\t" | 840 "packsswb %%mm2, %%mm1 \n\t" |
841 "paddb %%mm5, %%mm1 \n\t" | 841 "paddb %%mm5, %%mm1 \n\t" |
842 "pminub %%mm4, %%mm1 \n\t" | 842 "pminub %%mm4, %%mm1 \n\t" |
843 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit | 843 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit |
844 "por %%mm1, %%mm0 \n\t" | 844 "por %%mm1, %%mm0 \n\t" |
845 ::"m"(ref[l][b_idx]), | 845 ::"r"(ref[l]+b_idx), |
846 "m"(ref[l][b_idx+d_idx]), | 846 "r"(d_idx), |
847 "m"(mv[l][b_idx][0]), | 847 "r"(mv[l]+b_idx) |
848 "m"(mv[l][b_idx+d_idx][0]) | |
849 ); | 848 ); |
850 } | 849 } |
851 if(bidir==1){ | 850 if(bidir==1){ |
852 __asm__ volatile("pxor %%mm3, %%mm3 \n\t":); | 851 __asm__ volatile("pxor %%mm3, %%mm3 \n\t":); |
853 for( l = bidir; l >= 0; l-- ) { | 852 for( l = bidir; l >= 0; l-- ) { |
854 __asm__ volatile( | 853 __asm__ volatile( |
855 "movd %0, %%mm1 \n\t" | 854 "movd (%0), %%mm1 \n\t" |
856 "punpckldq %1, %%mm1 \n\t" | 855 "punpckldq (%1), %%mm1 \n\t" |
857 "punpckldq %%mm1, %%mm2 \n\t" | 856 "punpckldq %%mm1, %%mm2 \n\t" |
858 "pcmpeqb %%mm2, %%mm1 \n\t" | 857 "pcmpeqb %%mm2, %%mm1 \n\t" |
859 "paddb %%mm6, %%mm1 \n\t" | 858 "paddb %%mm6, %%mm1 \n\t" |
860 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] | 859 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] |
861 "por %%mm1, %%mm3 \n\t" | 860 "por %%mm1, %%mm3 \n\t" |
862 | 861 |
863 "movq %2, %%mm1 \n\t" | 862 "movq (%2), %%mm1 \n\t" |
864 "movq 8+1*%2, %%mm2 \n\t" | 863 "movq 8(%2), %%mm2 \n\t" |
865 "psubw %3, %%mm1 \n\t" | 864 "psubw (%3), %%mm1 \n\t" |
866 "psubw 8+1*%3, %%mm2 \n\t" | 865 "psubw 8(%3), %%mm2 \n\t" |
867 "packsswb %%mm2, %%mm1 \n\t" | 866 "packsswb %%mm2, %%mm1 \n\t" |
868 "paddb %%mm5, %%mm1 \n\t" | 867 "paddb %%mm5, %%mm1 \n\t" |
869 "pminub %%mm4, %%mm1 \n\t" | 868 "pminub %%mm4, %%mm1 \n\t" |
870 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit | 869 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit |
871 "por %%mm1, %%mm3 \n\t" | 870 "por %%mm1, %%mm3 \n\t" |
872 ::"m"(ref[l][b_idx]), | 871 ::"r"(ref[l]+b_idx), |
873 "m"(ref[1-l][b_idx+d_idx]), | 872 "r"(ref[1-l]+b_idx+d_idx), |
874 "m"(mv[l][b_idx][0]), | 873 "r"(mv[l][b_idx]), |
875 "m"(mv[1-l][b_idx+d_idx][0]) | 874 "r"(mv[1-l][b_idx+d_idx]) |
876 ); | 875 ); |
877 } | 876 } |
878 __asm__ volatile( | 877 __asm__ volatile( |
879 "pcmpeqw %%mm7, %%mm3 \n\t" | 878 "pcmpeqw %%mm7, %%mm3 \n\t" |
880 "psubusw %%mm3, %%mm0 \n\t" | 879 "psubusw %%mm3, %%mm0 \n\t" |