Mercurial > libavcodec.hg
comparison i386/h264dsp_mmx.c @ 4527:481763d70193 libavcodec
prevent h.264 MC related functions from being inlined (yes this is much faster the code just doesnt fit in the code cache otherwise)
author | michael |
---|---|
date | Fri, 16 Feb 2007 21:21:07 +0000 |
parents | 6e5dcbdbfeba |
children | 33c6fe489f62 |
comparison
equal
deleted
inserted
replaced
4526:30b8672a2357 | 4527:481763d70193 |
---|---|
682 "paddw "#F", "#A" \n\t"\ | 682 "paddw "#F", "#A" \n\t"\ |
683 "paddw "#A", %%mm6 \n\t"\ | 683 "paddw "#A", %%mm6 \n\t"\ |
684 "movq %%mm6, "#OF"(%1) \n\t" | 684 "movq %%mm6, "#OF"(%1) \n\t" |
685 | 685 |
686 #define QPEL_H264(OPNAME, OP, MMX)\ | 686 #define QPEL_H264(OPNAME, OP, MMX)\ |
687 static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | 687 static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
688 int h=4;\ | 688 int h=4;\ |
689 \ | 689 \ |
690 asm volatile(\ | 690 asm volatile(\ |
691 "pxor %%mm7, %%mm7 \n\t"\ | 691 "pxor %%mm7, %%mm7 \n\t"\ |
692 "movq %5, %%mm4 \n\t"\ | 692 "movq %5, %%mm4 \n\t"\ |
722 : "+a"(src), "+c"(dst), "+m"(h)\ | 722 : "+a"(src), "+c"(dst), "+m"(h)\ |
723 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | 723 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
724 : "memory"\ | 724 : "memory"\ |
725 );\ | 725 );\ |
726 }\ | 726 }\ |
727 static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ | 727 static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
728 int h=4;\ | 728 int h=4;\ |
729 asm volatile(\ | 729 asm volatile(\ |
730 "pxor %%mm7, %%mm7 \n\t"\ | 730 "pxor %%mm7, %%mm7 \n\t"\ |
731 "movq %0, %%mm4 \n\t"\ | 731 "movq %0, %%mm4 \n\t"\ |
732 "movq %1, %%mm5 \n\t"\ | 732 "movq %1, %%mm5 \n\t"\ |
766 : "D"((long)src2Stride), "S"((long)dstStride)\ | 766 : "D"((long)src2Stride), "S"((long)dstStride)\ |
767 : "memory"\ | 767 : "memory"\ |
768 );\ | 768 );\ |
769 }while(--h);\ | 769 }while(--h);\ |
770 }\ | 770 }\ |
771 static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | 771 static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
772 src -= 2*srcStride;\ | 772 src -= 2*srcStride;\ |
773 asm volatile(\ | 773 asm volatile(\ |
774 "pxor %%mm7, %%mm7 \n\t"\ | 774 "pxor %%mm7, %%mm7 \n\t"\ |
775 "movd (%0), %%mm0 \n\t"\ | 775 "movd (%0), %%mm0 \n\t"\ |
776 "add %2, %0 \n\t"\ | 776 "add %2, %0 \n\t"\ |
795 : "+a"(src), "+c"(dst)\ | 795 : "+a"(src), "+c"(dst)\ |
796 : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | 796 : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
797 : "memory"\ | 797 : "memory"\ |
798 );\ | 798 );\ |
799 }\ | 799 }\ |
800 static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | 800 static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
801 int h=4;\ | 801 int h=4;\ |
802 int w=3;\ | 802 int w=3;\ |
803 src -= 2*srcStride+2;\ | 803 src -= 2*srcStride+2;\ |
804 while(w--){\ | 804 while(w--){\ |
805 asm volatile(\ | 805 asm volatile(\ |
859 : "S"((long)dstStride), "m"(ff_pw_32)\ | 859 : "S"((long)dstStride), "m"(ff_pw_32)\ |
860 : "memory"\ | 860 : "memory"\ |
861 );\ | 861 );\ |
862 }\ | 862 }\ |
863 \ | 863 \ |
864 static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | 864 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
865 int h=8;\ | 865 int h=8;\ |
866 asm volatile(\ | 866 asm volatile(\ |
867 "pxor %%mm7, %%mm7 \n\t"\ | 867 "pxor %%mm7, %%mm7 \n\t"\ |
868 "movq %5, %%mm6 \n\t"\ | 868 "movq %5, %%mm6 \n\t"\ |
869 "1: \n\t"\ | 869 "1: \n\t"\ |
916 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | 916 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ |
917 : "memory"\ | 917 : "memory"\ |
918 );\ | 918 );\ |
919 }\ | 919 }\ |
920 \ | 920 \ |
921 static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ | 921 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
922 int h=8;\ | 922 int h=8;\ |
923 asm volatile(\ | 923 asm volatile(\ |
924 "pxor %%mm7, %%mm7 \n\t"\ | 924 "pxor %%mm7, %%mm7 \n\t"\ |
925 "movq %0, %%mm6 \n\t"\ | 925 "movq %0, %%mm6 \n\t"\ |
926 :: "m"(ff_pw_5)\ | 926 :: "m"(ff_pw_5)\ |
979 : "memory"\ | 979 : "memory"\ |
980 );\ | 980 );\ |
981 }while(--h);\ | 981 }while(--h);\ |
982 }\ | 982 }\ |
983 \ | 983 \ |
984 static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | 984 static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
985 int w= 2;\ | 985 int w= 2;\ |
986 src -= 2*srcStride;\ | 986 src -= 2*srcStride;\ |
987 \ | 987 \ |
988 while(w--){\ | 988 while(w--){\ |
989 asm volatile(\ | 989 asm volatile(\ |
1034 }\ | 1034 }\ |
1035 src += 4-(h+5)*srcStride;\ | 1035 src += 4-(h+5)*srcStride;\ |
1036 dst += 4-h*dstStride;\ | 1036 dst += 4-h*dstStride;\ |
1037 }\ | 1037 }\ |
1038 }\ | 1038 }\ |
1039 static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ | 1039 static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ |
1040 int h = size;\ | 1040 int h = size;\ |
1041 int w = (size+8)>>2;\ | 1041 int w = (size+8)>>2;\ |
1042 src -= 2*srcStride+2;\ | 1042 src -= 2*srcStride+2;\ |
1043 while(w--){\ | 1043 while(w--){\ |
1044 asm volatile(\ | 1044 asm volatile(\ |
1139 }\ | 1139 }\ |
1140 \ | 1140 \ |
1141 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | 1141 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1142 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ | 1142 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ |
1143 }\ | 1143 }\ |
1144 static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | 1144 static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1145 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ | 1145 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ |
1146 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ | 1146 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ |
1147 }\ | 1147 }\ |
1148 \ | 1148 \ |
1149 static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | 1149 static av_noinline void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1150 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ | 1150 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ |
1151 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ | 1151 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
1152 src += 8*srcStride;\ | 1152 src += 8*srcStride;\ |
1153 dst += 8*dstStride;\ | 1153 dst += 8*dstStride;\ |
1154 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ | 1154 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ |
1155 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ | 1155 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ |
1156 }\ | 1156 }\ |
1157 \ | 1157 \ |
1158 static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ | 1158 static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ |
1159 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ | 1159 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ |
1160 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ | 1160 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ |
1161 src += 8*dstStride;\ | 1161 src += 8*dstStride;\ |
1162 dst += 8*dstStride;\ | 1162 dst += 8*dstStride;\ |
1163 src2 += 8*src2Stride;\ | 1163 src2 += 8*src2Stride;\ |
1171 \ | 1171 \ |
1172 static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | 1172 static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
1173 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ | 1173 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ |
1174 }\ | 1174 }\ |
1175 \ | 1175 \ |
1176 static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ | 1176 static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ |
1177 {\ | 1177 {\ |
1178 asm volatile(\ | 1178 asm volatile(\ |
1179 "movq %5, %%mm6 \n\t"\ | 1179 "movq %5, %%mm6 \n\t"\ |
1180 "movq (%1), %%mm0 \n\t"\ | 1180 "movq (%1), %%mm0 \n\t"\ |
1181 "movq 24(%1), %%mm1 \n\t"\ | 1181 "movq 24(%1), %%mm1 \n\t"\ |
1205 OP(%%mm1, (%2,%4), %%mm5, d)\ | 1205 OP(%%mm1, (%2,%4), %%mm5, d)\ |
1206 :"+a"(src8), "+c"(src16), "+d"(dst)\ | 1206 :"+a"(src8), "+c"(src16), "+d"(dst)\ |
1207 :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\ | 1207 :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\ |
1208 :"memory");\ | 1208 :"memory");\ |
1209 }\ | 1209 }\ |
1210 static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ | 1210 static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ |
1211 {\ | 1211 {\ |
1212 asm volatile(\ | 1212 asm volatile(\ |
1213 "movq %0, %%mm6 \n\t"\ | 1213 "movq %0, %%mm6 \n\t"\ |
1214 ::"m"(ff_pw_16)\ | 1214 ::"m"(ff_pw_16)\ |
1215 );\ | 1215 );\ |