comparison x86/h264dsp_mmx.c @ 10943:3a723e8dcd26 libavcodec

Use constant offsets for memory operands since gcc is unable to This fixes gcc failing to fit 6 memory locations into 7 registers on x86-32
author conrad
date Wed, 20 Jan 2010 00:34:10 +0000
parents 2a50b786b888
children eb9a2581f50e
comparison
equal deleted inserted replaced
10942:ee944149442f 10943:3a723e8dcd26
832 "paddb %%mm6, %%mm1 \n\t" 832 "paddb %%mm6, %%mm1 \n\t"
833 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] 833 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
834 "por %%mm1, %%mm0 \n\t" 834 "por %%mm1, %%mm0 \n\t"
835 835
836 "movq %2, %%mm1 \n\t" 836 "movq %2, %%mm1 \n\t"
837 "movq %3, %%mm2 \n\t" 837 "movq 8+1*%2, %%mm2 \n\t"
838 "psubw %4, %%mm1 \n\t" 838 "psubw %3, %%mm1 \n\t"
839 "psubw %5, %%mm2 \n\t" 839 "psubw 8+1*%3, %%mm2 \n\t"
840 "packsswb %%mm2, %%mm1 \n\t" 840 "packsswb %%mm2, %%mm1 \n\t"
841 "paddb %%mm5, %%mm1 \n\t" 841 "paddb %%mm5, %%mm1 \n\t"
842 "pminub %%mm4, %%mm1 \n\t" 842 "pminub %%mm4, %%mm1 \n\t"
843 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit 843 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
844 "por %%mm1, %%mm0 \n\t" 844 "por %%mm1, %%mm0 \n\t"
845 ::"m"(ref[l][b_idx]), 845 ::"m"(ref[l][b_idx]),
846 "m"(ref[l][b_idx+d_idx]), 846 "m"(ref[l][b_idx+d_idx]),
847 "m"(mv[l][b_idx][0]), 847 "m"(mv[l][b_idx][0]),
848 "m"(mv[l][b_idx+2][0]), 848 "m"(mv[l][b_idx+d_idx][0])
849 "m"(mv[l][b_idx+d_idx][0]),
850 "m"(mv[l][b_idx+d_idx+2][0])
851 ); 849 );
852 } 850 }
853 if(bidir==1){ 851 if(bidir==1){
854 __asm__ volatile("pxor %%mm3, %%mm3 \n\t":); 852 __asm__ volatile("pxor %%mm3, %%mm3 \n\t":);
855 for( l = bidir; l >= 0; l-- ) { 853 for( l = bidir; l >= 0; l-- ) {
861 "paddb %%mm6, %%mm1 \n\t" 859 "paddb %%mm6, %%mm1 \n\t"
862 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] 860 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
863 "por %%mm1, %%mm3 \n\t" 861 "por %%mm1, %%mm3 \n\t"
864 862
865 "movq %2, %%mm1 \n\t" 863 "movq %2, %%mm1 \n\t"
866 "movq %3, %%mm2 \n\t" 864 "movq 8+1*%2, %%mm2 \n\t"
867 "psubw %4, %%mm1 \n\t" 865 "psubw %3, %%mm1 \n\t"
868 "psubw %5, %%mm2 \n\t" 866 "psubw 8+1*%3, %%mm2 \n\t"
869 "packsswb %%mm2, %%mm1 \n\t" 867 "packsswb %%mm2, %%mm1 \n\t"
870 "paddb %%mm5, %%mm1 \n\t" 868 "paddb %%mm5, %%mm1 \n\t"
871 "pminub %%mm4, %%mm1 \n\t" 869 "pminub %%mm4, %%mm1 \n\t"
872 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit 870 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
873 "por %%mm1, %%mm3 \n\t" 871 "por %%mm1, %%mm3 \n\t"
874 ::"m"(ref[l][b_idx]), 872 ::"m"(ref[l][b_idx]),
875 "m"(ref[1-l][b_idx+d_idx]), 873 "m"(ref[1-l][b_idx+d_idx]),
876 "m"(mv[l][b_idx][0]), 874 "m"(mv[l][b_idx][0]),
877 "m"(mv[l][b_idx+2][0]), 875 "m"(mv[1-l][b_idx+d_idx][0])
878 "m"(mv[1-l][b_idx+d_idx][0]),
879 "m"(mv[1-l][b_idx+d_idx+2][0])
880 ); 876 );
881 } 877 }
882 __asm__ volatile( 878 __asm__ volatile(
883 "pcmpeqw %%mm7, %%mm3 \n\t" 879 "pcmpeqw %%mm7, %%mm3 \n\t"
884 "psubusw %%mm3, %%mm0 \n\t" 880 "psubusw %%mm3, %%mm0 \n\t"