Mercurial > libavcodec.hg
comparison imgconvert.c @ 801:f720b01c0fd5 libavcodec
1) Add MMX deinterlace code.
2) "Fix" first and last line deinterlace. I had second-thoughts that this might be some image filtering algorithm that someone cleverer than I created.
3) Add in-place deinterlace functions (only used when src == dst).
patch by (Fred <foohoo at shaw dot ca>)
author | michaelni |
---|---|
date | Wed, 30 Oct 2002 09:09:34 +0000 |
parents | 918756bffda2 |
children | 48215b2c3888 |
comparison
equal
deleted
inserted
replaced
800:52ac8213387e | 801:f720b01c0fd5 |
---|---|
19 #include "avcodec.h" | 19 #include "avcodec.h" |
20 #include "dsputil.h" | 20 #include "dsputil.h" |
21 | 21 |
22 #ifdef USE_FASTMEMCPY | 22 #ifdef USE_FASTMEMCPY |
23 #include "fastmemcpy.h" | 23 #include "fastmemcpy.h" |
24 #endif | |
25 | |
26 #ifdef HAVE_MMX | |
27 #include "i386/mmx.h" | |
24 #endif | 28 #endif |
25 /* XXX: totally non optimized */ | 29 /* XXX: totally non optimized */ |
26 | 30 |
27 static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, | 31 static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, |
28 UINT8 *src, int width, int height) | 32 UINT8 *src, int width, int height) |
760 return -1; | 764 return -1; |
761 } | 765 } |
762 return 0; | 766 return 0; |
763 } | 767 } |
764 | 768 |
769 | |
770 #ifdef HAVE_MMX | |
771 #define DEINT_INPLACE_LINE_LUM \ | |
772 movd_m2r(lum_m4[0],mm0);\ | |
773 movd_m2r(lum_m3[0],mm1);\ | |
774 movd_m2r(lum_m2[0],mm2);\ | |
775 movd_m2r(lum_m1[0],mm3);\ | |
776 movd_m2r(lum[0],mm4);\ | |
777 punpcklbw_r2r(mm7,mm0);\ | |
778 movd_r2m(mm2,lum_m4[0]);\ | |
779 punpcklbw_r2r(mm7,mm1);\ | |
780 punpcklbw_r2r(mm7,mm2);\ | |
781 punpcklbw_r2r(mm7,mm3);\ | |
782 punpcklbw_r2r(mm7,mm4);\ | |
783 paddw_r2r(mm3,mm1);\ | |
784 psllw_i2r(1,mm2);\ | |
785 paddw_r2r(mm4,mm0);\ | |
786 psllw_i2r(2,mm1);\ | |
787 paddw_r2r(mm6,mm2);\ | |
788 paddw_r2r(mm2,mm1);\ | |
789 psubusw_r2r(mm0,mm1);\ | |
790 psrlw_i2r(3,mm1);\ | |
791 packuswb_r2r(mm7,mm1);\ | |
792 movd_r2m(mm1,lum_m2[0]); | |
793 | |
794 #define DEINT_LINE_LUM \ | |
795 movd_m2r(lum_m4[0],mm0);\ | |
796 movd_m2r(lum_m3[0],mm1);\ | |
797 movd_m2r(lum_m2[0],mm2);\ | |
798 movd_m2r(lum_m1[0],mm3);\ | |
799 movd_m2r(lum[0],mm4);\ | |
800 punpcklbw_r2r(mm7,mm0);\ | |
801 punpcklbw_r2r(mm7,mm1);\ | |
802 punpcklbw_r2r(mm7,mm2);\ | |
803 punpcklbw_r2r(mm7,mm3);\ | |
804 punpcklbw_r2r(mm7,mm4);\ | |
805 paddw_r2r(mm3,mm1);\ | |
806 psllw_i2r(1,mm2);\ | |
807 paddw_r2r(mm4,mm0);\ | |
808 psllw_i2r(2,mm1);\ | |
809 paddw_r2r(mm6,mm2);\ | |
810 paddw_r2r(mm2,mm1);\ | |
811 psubusw_r2r(mm0,mm1);\ | |
812 psrlw_i2r(3,mm1);\ | |
813 packuswb_r2r(mm7,mm1);\ | |
814 movd_r2m(mm1,dst[0]); | |
815 #endif | |
816 | |
765 /* filter parameters: [-1 4 2 4 -1] // 8 */ | 817 /* filter parameters: [-1 4 2 4 -1] // 8 */ |
766 static void deinterlace_line(UINT8 *dst, UINT8 *src, int src_wrap, | 818 static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum, |
767 int size) | 819 int size) |
768 { | 820 { |
821 #ifndef HAVE_MMX | |
769 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 822 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
770 int sum; | 823 int sum; |
771 UINT8 *s; | |
772 | 824 |
773 for(;size > 0;size--) { | 825 for(;size > 0;size--) { |
774 s = src; | 826 sum = -lum_m4[0]; |
775 sum = -s[0]; | 827 sum += lum_m3[0] << 2; |
776 s += src_wrap; | 828 sum += lum_m2[0] << 1; |
777 sum += s[0] << 2; | 829 sum += lum_m1[0] << 2; |
778 s += src_wrap; | 830 sum += -lum[0]; |
779 sum += s[0] << 1; | |
780 s += src_wrap; | |
781 sum += s[0] << 2; | |
782 s += src_wrap; | |
783 sum += -s[0]; | |
784 dst[0] = cm[(sum + 4) >> 3]; | 831 dst[0] = cm[(sum + 4) >> 3]; |
832 lum_m4++; | |
833 lum_m3++; | |
834 lum_m2++; | |
835 lum_m1++; | |
836 lum++; | |
785 dst++; | 837 dst++; |
786 src++; | 838 } |
787 } | 839 #else |
840 | |
841 for (;size > 3; size-=4) { | |
842 DEINT_LINE_LUM | |
843 lum_m4+=4; | |
844 lum_m3+=4; | |
845 lum_m2+=4; | |
846 lum_m1+=4; | |
847 lum+=4; | |
848 dst+=4; | |
849 } | |
850 #endif | |
851 } | |
852 static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum, | |
853 int size) | |
854 { | |
855 #ifndef HAVE_MMX | |
856 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
857 int sum; | |
858 | |
859 for(;size > 0;size--) { | |
860 sum = -lum_m4[0]; | |
861 sum += lum_m3[0] << 2; | |
862 sum += lum_m2[0] << 1; | |
863 lum_m4[0]=lum_m2[0]; | |
864 sum += lum_m1[0] << 2; | |
865 sum += -lum[0]; | |
866 lum_m2[0] = cm[(sum + 4) >> 3]; | |
867 lum_m4++; | |
868 lum_m3++; | |
869 lum_m2++; | |
870 lum_m1++; | |
871 lum++; | |
872 } | |
873 #else | |
874 | |
875 for (;size > 3; size-=4) { | |
876 DEINT_INPLACE_LINE_LUM | |
877 lum_m4+=4; | |
878 lum_m3+=4; | |
879 lum_m2+=4; | |
880 lum_m1+=4; | |
881 lum+=4; | |
882 } | |
883 #endif | |
788 } | 884 } |
789 | 885 |
790 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The | 886 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The |
791 top field is copied as is, but the bottom field is deinterlaced | 887 top field is copied as is, but the bottom field is deinterlaced |
792 against the top field. */ | 888 against the top field. */ |
793 static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap, | 889 static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap, |
794 UINT8 *src1, int src_wrap, | 890 UINT8 *src1, int src_wrap, |
891 int width, int height) | |
892 { | |
893 UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2; | |
894 int y; | |
895 | |
896 src_m2 = src1; | |
897 src_m1 = src1; | |
898 src_0=&src_m1[src_wrap]; | |
899 src_p1=&src_0[src_wrap]; | |
900 src_p2=&src_p1[src_wrap]; | |
901 for(y=0;y<(height-2);y+=2) { | |
902 memcpy(dst,src_m1,width); | |
903 dst += dst_wrap; | |
904 deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width); | |
905 src_m2 = src_0; | |
906 src_m1 = src_p1; | |
907 src_0 = src_p2; | |
908 src_p1 += 2*src_wrap; | |
909 src_p2 += 2*src_wrap; | |
910 dst += dst_wrap; | |
911 } | |
912 memcpy(dst,src_m1,width); | |
913 dst += dst_wrap; | |
914 /* do last line */ | |
915 deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width); | |
916 } | |
917 | |
918 static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap, | |
795 int width, int height) | 919 int width, int height) |
796 { | 920 { |
797 UINT8 *src, *ptr; | 921 UINT8 *src_m1, *src_0, *src_p1, *src_p2; |
798 int y, y1, i; | 922 int y; |
799 UINT8 *buf; | 923 UINT8 *buf; |
800 | 924 buf = (UINT8*)av_malloc(width); |
801 buf = (UINT8*)av_malloc(5 * width); | 925 |
802 | 926 src_m1 = src1; |
803 src = src1; | 927 memcpy(buf,src_m1,width); |
804 for(y=0;y<height;y+=2) { | 928 src_0=&src_m1[src_wrap]; |
805 /* copy top field line */ | 929 src_p1=&src_0[src_wrap]; |
806 memcpy(dst, src, width); | 930 src_p2=&src_p1[src_wrap]; |
807 dst += dst_wrap; | 931 for(y=0;y<(height-2);y+=2) { |
808 src += (1 - 2) * src_wrap; | 932 deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width); |
809 y1 = y - 2; | 933 src_m1 = src_p1; |
810 if (y1 >= 0 && (y1 + 4) < height) { | 934 src_0 = src_p2; |
811 /* fast case : no edges */ | 935 src_p1 += 2*src_wrap; |
812 deinterlace_line(dst, src, src_wrap, width); | 936 src_p2 += 2*src_wrap; |
813 } else { | 937 } |
814 /* in order to use the same function, we use an intermediate buffer */ | 938 /* do last line */ |
815 ptr = buf; | 939 deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width); |
816 for(i=0;i<5;i++) { | |
817 if (y1 < 0) | |
818 memcpy(ptr, src1, width); | |
819 else if (y1 >= height) | |
820 memcpy(ptr, src1 + (height - 1) * src_wrap, width); | |
821 else | |
822 memcpy(ptr, src1 + y1 * src_wrap, width); | |
823 y1++; | |
824 ptr += width; | |
825 } | |
826 deinterlace_line(dst, buf, width, width); | |
827 } | |
828 dst += dst_wrap; | |
829 src += (2 + 1) * src_wrap; | |
830 } | |
831 av_free(buf); | 940 av_free(buf); |
832 } | 941 } |
833 | 942 |
834 | 943 |
835 /* deinterlace, return -1 if format not handled */ | 944 /* deinterlace - if not supported return -1 */ |
836 int avpicture_deinterlace(AVPicture *dst, AVPicture *src, | 945 int avpicture_deinterlace(AVPicture *dst, AVPicture *src, |
837 int pix_fmt, int width, int height) | 946 int pix_fmt, int width, int height) |
838 { | 947 { |
839 int i; | 948 int i; |
840 | 949 |
841 if (pix_fmt != PIX_FMT_YUV420P && | 950 if (pix_fmt != PIX_FMT_YUV420P && |
842 pix_fmt != PIX_FMT_YUV422P && | 951 pix_fmt != PIX_FMT_YUV422P && |
843 pix_fmt != PIX_FMT_YUV444P) | 952 pix_fmt != PIX_FMT_YUV444P) |
844 return -1; | 953 return -1; |
845 if ((width & 1) != 0 || (height & 3) != 0) | 954 if ((width & 3) != 0 || (height & 3) != 0) |
846 return -1; | 955 return -1; |
956 | |
957 #ifdef HAVE_MMX | |
958 { | |
959 mmx_t rounder; | |
960 rounder.uw[0]=4; | |
961 rounder.uw[1]=4; | |
962 rounder.uw[2]=4; | |
963 rounder.uw[3]=4; | |
964 pxor_r2r(mm7,mm7); | |
965 movq_m2r(rounder,mm6); | |
966 } | |
967 #endif | |
968 | |
847 | 969 |
848 for(i=0;i<3;i++) { | 970 for(i=0;i<3;i++) { |
849 if (i == 1) { | 971 if (i == 1) { |
850 switch(pix_fmt) { | 972 switch(pix_fmt) { |
851 case PIX_FMT_YUV420P: | 973 case PIX_FMT_YUV420P: |
857 break; | 979 break; |
858 default: | 980 default: |
859 break; | 981 break; |
860 } | 982 } |
861 } | 983 } |
862 deinterlace_bottom_field(dst->data[i], dst->linesize[i], | 984 if (src == dst) { |
863 src->data[i], src->linesize[i], | 985 deinterlace_bottom_field_inplace(src->data[i], src->linesize[i], |
864 width, height); | 986 width, height); |
865 } | 987 } else { |
988 deinterlace_bottom_field(dst->data[i],dst->linesize[i], | |
989 src->data[i], src->linesize[i], | |
990 width, height); | |
991 } | |
992 } | |
993 #ifdef HAVE_MMX | |
994 emms(); | |
995 #endif | |
866 return 0; | 996 return 0; |
867 } | 997 } |
868 | 998 |
869 #undef FIX | 999 #undef FIX |