comparison i386/h264dsp_mmx.c @ 4527:481763d70193 libavcodec

prevent h.264 MC related functions from being inlined (yes this is much faster the code just doesnt fit in the code cache otherwise)
author michael
date Fri, 16 Feb 2007 21:21:07 +0000
parents 6e5dcbdbfeba
children 33c6fe489f62
comparison
equal deleted inserted replaced
4526:30b8672a2357 4527:481763d70193
682 "paddw "#F", "#A" \n\t"\ 682 "paddw "#F", "#A" \n\t"\
683 "paddw "#A", %%mm6 \n\t"\ 683 "paddw "#A", %%mm6 \n\t"\
684 "movq %%mm6, "#OF"(%1) \n\t" 684 "movq %%mm6, "#OF"(%1) \n\t"
685 685
686 #define QPEL_H264(OPNAME, OP, MMX)\ 686 #define QPEL_H264(OPNAME, OP, MMX)\
687 static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 687 static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
688 int h=4;\ 688 int h=4;\
689 \ 689 \
690 asm volatile(\ 690 asm volatile(\
691 "pxor %%mm7, %%mm7 \n\t"\ 691 "pxor %%mm7, %%mm7 \n\t"\
692 "movq %5, %%mm4 \n\t"\ 692 "movq %5, %%mm4 \n\t"\
722 : "+a"(src), "+c"(dst), "+m"(h)\ 722 : "+a"(src), "+c"(dst), "+m"(h)\
723 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ 723 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
724 : "memory"\ 724 : "memory"\
725 );\ 725 );\
726 }\ 726 }\
727 static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ 727 static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
728 int h=4;\ 728 int h=4;\
729 asm volatile(\ 729 asm volatile(\
730 "pxor %%mm7, %%mm7 \n\t"\ 730 "pxor %%mm7, %%mm7 \n\t"\
731 "movq %0, %%mm4 \n\t"\ 731 "movq %0, %%mm4 \n\t"\
732 "movq %1, %%mm5 \n\t"\ 732 "movq %1, %%mm5 \n\t"\
766 : "D"((long)src2Stride), "S"((long)dstStride)\ 766 : "D"((long)src2Stride), "S"((long)dstStride)\
767 : "memory"\ 767 : "memory"\
768 );\ 768 );\
769 }while(--h);\ 769 }while(--h);\
770 }\ 770 }\
771 static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 771 static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
772 src -= 2*srcStride;\ 772 src -= 2*srcStride;\
773 asm volatile(\ 773 asm volatile(\
774 "pxor %%mm7, %%mm7 \n\t"\ 774 "pxor %%mm7, %%mm7 \n\t"\
775 "movd (%0), %%mm0 \n\t"\ 775 "movd (%0), %%mm0 \n\t"\
776 "add %2, %0 \n\t"\ 776 "add %2, %0 \n\t"\
795 : "+a"(src), "+c"(dst)\ 795 : "+a"(src), "+c"(dst)\
796 : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ 796 : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
797 : "memory"\ 797 : "memory"\
798 );\ 798 );\
799 }\ 799 }\
800 static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 800 static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
801 int h=4;\ 801 int h=4;\
802 int w=3;\ 802 int w=3;\
803 src -= 2*srcStride+2;\ 803 src -= 2*srcStride+2;\
804 while(w--){\ 804 while(w--){\
805 asm volatile(\ 805 asm volatile(\
859 : "S"((long)dstStride), "m"(ff_pw_32)\ 859 : "S"((long)dstStride), "m"(ff_pw_32)\
860 : "memory"\ 860 : "memory"\
861 );\ 861 );\
862 }\ 862 }\
863 \ 863 \
864 static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 864 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
865 int h=8;\ 865 int h=8;\
866 asm volatile(\ 866 asm volatile(\
867 "pxor %%mm7, %%mm7 \n\t"\ 867 "pxor %%mm7, %%mm7 \n\t"\
868 "movq %5, %%mm6 \n\t"\ 868 "movq %5, %%mm6 \n\t"\
869 "1: \n\t"\ 869 "1: \n\t"\
916 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ 916 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
917 : "memory"\ 917 : "memory"\
918 );\ 918 );\
919 }\ 919 }\
920 \ 920 \
921 static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ 921 static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
922 int h=8;\ 922 int h=8;\
923 asm volatile(\ 923 asm volatile(\
924 "pxor %%mm7, %%mm7 \n\t"\ 924 "pxor %%mm7, %%mm7 \n\t"\
925 "movq %0, %%mm6 \n\t"\ 925 "movq %0, %%mm6 \n\t"\
926 :: "m"(ff_pw_5)\ 926 :: "m"(ff_pw_5)\
979 : "memory"\ 979 : "memory"\
980 );\ 980 );\
981 }while(--h);\ 981 }while(--h);\
982 }\ 982 }\
983 \ 983 \
984 static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 984 static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
985 int w= 2;\ 985 int w= 2;\
986 src -= 2*srcStride;\ 986 src -= 2*srcStride;\
987 \ 987 \
988 while(w--){\ 988 while(w--){\
989 asm volatile(\ 989 asm volatile(\
1034 }\ 1034 }\
1035 src += 4-(h+5)*srcStride;\ 1035 src += 4-(h+5)*srcStride;\
1036 dst += 4-h*dstStride;\ 1036 dst += 4-h*dstStride;\
1037 }\ 1037 }\
1038 }\ 1038 }\
1039 static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ 1039 static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
1040 int h = size;\ 1040 int h = size;\
1041 int w = (size+8)>>2;\ 1041 int w = (size+8)>>2;\
1042 src -= 2*srcStride+2;\ 1042 src -= 2*srcStride+2;\
1043 while(w--){\ 1043 while(w--){\
1044 asm volatile(\ 1044 asm volatile(\
1139 }\ 1139 }\
1140 \ 1140 \
1141 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 1141 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1142 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ 1142 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
1143 }\ 1143 }\
1144 static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 1144 static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1145 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ 1145 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
1146 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 1146 OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
1147 }\ 1147 }\
1148 \ 1148 \
1149 static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 1149 static av_noinline void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1150 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 1150 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
1151 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 1151 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
1152 src += 8*srcStride;\ 1152 src += 8*srcStride;\
1153 dst += 8*dstStride;\ 1153 dst += 8*dstStride;\
1154 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 1154 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
1155 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 1155 OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
1156 }\ 1156 }\
1157 \ 1157 \
1158 static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ 1158 static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
1159 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 1159 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
1160 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 1160 OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
1161 src += 8*dstStride;\ 1161 src += 8*dstStride;\
1162 dst += 8*dstStride;\ 1162 dst += 8*dstStride;\
1163 src2 += 8*src2Stride;\ 1163 src2 += 8*src2Stride;\
1171 \ 1171 \
1172 static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 1172 static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1173 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ 1173 OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
1174 }\ 1174 }\
1175 \ 1175 \
1176 static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ 1176 static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
1177 {\ 1177 {\
1178 asm volatile(\ 1178 asm volatile(\
1179 "movq %5, %%mm6 \n\t"\ 1179 "movq %5, %%mm6 \n\t"\
1180 "movq (%1), %%mm0 \n\t"\ 1180 "movq (%1), %%mm0 \n\t"\
1181 "movq 24(%1), %%mm1 \n\t"\ 1181 "movq 24(%1), %%mm1 \n\t"\
1205 OP(%%mm1, (%2,%4), %%mm5, d)\ 1205 OP(%%mm1, (%2,%4), %%mm5, d)\
1206 :"+a"(src8), "+c"(src16), "+d"(dst)\ 1206 :"+a"(src8), "+c"(src16), "+d"(dst)\
1207 :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\ 1207 :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\
1208 :"memory");\ 1208 :"memory");\
1209 }\ 1209 }\
1210 static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ 1210 static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
1211 {\ 1211 {\
1212 asm volatile(\ 1212 asm volatile(\
1213 "movq %0, %%mm6 \n\t"\ 1213 "movq %0, %%mm6 \n\t"\
1214 ::"m"(ff_pw_16)\ 1214 ::"m"(ff_pw_16)\
1215 );\ 1215 );\