Mercurial > libavcodec.hg
comparison x86/h264dsp_mmx.c @ 10943:3a723e8dcd26 libavcodec
Use constant offsets for memory operands since gcc is unable to
This fixes gcc failing to fit 6 memory locations into 7 registers on x86-32
author | conrad |
---|---|
date | Wed, 20 Jan 2010 00:34:10 +0000 |
parents | 2a50b786b888 |
children | eb9a2581f50e |
comparison
equal
deleted
inserted
replaced
10942:ee944149442f | 10943:3a723e8dcd26 |
---|---|
832 "paddb %%mm6, %%mm1 \n\t" | 832 "paddb %%mm6, %%mm1 \n\t" |
833 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] | 833 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] |
834 "por %%mm1, %%mm0 \n\t" | 834 "por %%mm1, %%mm0 \n\t" |
835 | 835 |
836 "movq %2, %%mm1 \n\t" | 836 "movq %2, %%mm1 \n\t" |
837 "movq %3, %%mm2 \n\t" | 837 "movq 8+1*%2, %%mm2 \n\t" |
838 "psubw %4, %%mm1 \n\t" | 838 "psubw %3, %%mm1 \n\t" |
839 "psubw %5, %%mm2 \n\t" | 839 "psubw 8+1*%3, %%mm2 \n\t" |
840 "packsswb %%mm2, %%mm1 \n\t" | 840 "packsswb %%mm2, %%mm1 \n\t" |
841 "paddb %%mm5, %%mm1 \n\t" | 841 "paddb %%mm5, %%mm1 \n\t" |
842 "pminub %%mm4, %%mm1 \n\t" | 842 "pminub %%mm4, %%mm1 \n\t" |
843 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit | 843 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit |
844 "por %%mm1, %%mm0 \n\t" | 844 "por %%mm1, %%mm0 \n\t" |
845 ::"m"(ref[l][b_idx]), | 845 ::"m"(ref[l][b_idx]), |
846 "m"(ref[l][b_idx+d_idx]), | 846 "m"(ref[l][b_idx+d_idx]), |
847 "m"(mv[l][b_idx][0]), | 847 "m"(mv[l][b_idx][0]), |
848 "m"(mv[l][b_idx+2][0]), | 848 "m"(mv[l][b_idx+d_idx][0]) |
849 "m"(mv[l][b_idx+d_idx][0]), | |
850 "m"(mv[l][b_idx+d_idx+2][0]) | |
851 ); | 849 ); |
852 } | 850 } |
853 if(bidir==1){ | 851 if(bidir==1){ |
854 __asm__ volatile("pxor %%mm3, %%mm3 \n\t":); | 852 __asm__ volatile("pxor %%mm3, %%mm3 \n\t":); |
855 for( l = bidir; l >= 0; l-- ) { | 853 for( l = bidir; l >= 0; l-- ) { |
861 "paddb %%mm6, %%mm1 \n\t" | 859 "paddb %%mm6, %%mm1 \n\t" |
862 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] | 860 "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] |
863 "por %%mm1, %%mm3 \n\t" | 861 "por %%mm1, %%mm3 \n\t" |
864 | 862 |
865 "movq %2, %%mm1 \n\t" | 863 "movq %2, %%mm1 \n\t" |
866 "movq %3, %%mm2 \n\t" | 864 "movq 8+1*%2, %%mm2 \n\t" |
867 "psubw %4, %%mm1 \n\t" | 865 "psubw %3, %%mm1 \n\t" |
868 "psubw %5, %%mm2 \n\t" | 866 "psubw 8+1*%3, %%mm2 \n\t" |
869 "packsswb %%mm2, %%mm1 \n\t" | 867 "packsswb %%mm2, %%mm1 \n\t" |
870 "paddb %%mm5, %%mm1 \n\t" | 868 "paddb %%mm5, %%mm1 \n\t" |
871 "pminub %%mm4, %%mm1 \n\t" | 869 "pminub %%mm4, %%mm1 \n\t" |
872 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit | 870 "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit |
873 "por %%mm1, %%mm3 \n\t" | 871 "por %%mm1, %%mm3 \n\t" |
874 ::"m"(ref[l][b_idx]), | 872 ::"m"(ref[l][b_idx]), |
875 "m"(ref[1-l][b_idx+d_idx]), | 873 "m"(ref[1-l][b_idx+d_idx]), |
876 "m"(mv[l][b_idx][0]), | 874 "m"(mv[l][b_idx][0]), |
877 "m"(mv[l][b_idx+2][0]), | 875 "m"(mv[1-l][b_idx+d_idx][0]) |
878 "m"(mv[1-l][b_idx+d_idx][0]), | |
879 "m"(mv[1-l][b_idx+d_idx+2][0]) | |
880 ); | 876 ); |
881 } | 877 } |
882 __asm__ volatile( | 878 __asm__ volatile( |
883 "pcmpeqw %%mm7, %%mm3 \n\t" | 879 "pcmpeqw %%mm7, %%mm3 \n\t" |
884 "psubusw %%mm3, %%mm0 \n\t" | 880 "psubusw %%mm3, %%mm0 \n\t" |