comparison imgconvert.c @ 801:f720b01c0fd5 libavcodec

1) Add MMX deinterlace code. 2) "Fix" first and last line deinterlace. I had second-thoughts that this might be some image filtering algorithm that someone cleverer than I created. 3) Add in-place deinterlace functions (only used when src == dst). patch by (Fred <foohoo at shaw dot ca>)
author michaelni
date Wed, 30 Oct 2002 09:09:34 +0000
parents 918756bffda2
children 48215b2c3888
comparison
equal deleted inserted replaced
800:52ac8213387e 801:f720b01c0fd5
19 #include "avcodec.h" 19 #include "avcodec.h"
20 #include "dsputil.h" 20 #include "dsputil.h"
21 21
22 #ifdef USE_FASTMEMCPY 22 #ifdef USE_FASTMEMCPY
23 #include "fastmemcpy.h" 23 #include "fastmemcpy.h"
24 #endif
25
26 #ifdef HAVE_MMX
27 #include "i386/mmx.h"
24 #endif 28 #endif
25 /* XXX: totally non optimized */ 29 /* XXX: totally non optimized */
26 30
27 static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, 31 static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
28 UINT8 *src, int width, int height) 32 UINT8 *src, int width, int height)
760 return -1; 764 return -1;
761 } 765 }
762 return 0; 766 return 0;
763 } 767 }
764 768
769
770 #ifdef HAVE_MMX
771 #define DEINT_INPLACE_LINE_LUM \
772 movd_m2r(lum_m4[0],mm0);\
773 movd_m2r(lum_m3[0],mm1);\
774 movd_m2r(lum_m2[0],mm2);\
775 movd_m2r(lum_m1[0],mm3);\
776 movd_m2r(lum[0],mm4);\
777 punpcklbw_r2r(mm7,mm0);\
778 movd_r2m(mm2,lum_m4[0]);\
779 punpcklbw_r2r(mm7,mm1);\
780 punpcklbw_r2r(mm7,mm2);\
781 punpcklbw_r2r(mm7,mm3);\
782 punpcklbw_r2r(mm7,mm4);\
783 paddw_r2r(mm3,mm1);\
784 psllw_i2r(1,mm2);\
785 paddw_r2r(mm4,mm0);\
786 psllw_i2r(2,mm1);\
787 paddw_r2r(mm6,mm2);\
788 paddw_r2r(mm2,mm1);\
789 psubusw_r2r(mm0,mm1);\
790 psrlw_i2r(3,mm1);\
791 packuswb_r2r(mm7,mm1);\
792 movd_r2m(mm1,lum_m2[0]);
793
794 #define DEINT_LINE_LUM \
795 movd_m2r(lum_m4[0],mm0);\
796 movd_m2r(lum_m3[0],mm1);\
797 movd_m2r(lum_m2[0],mm2);\
798 movd_m2r(lum_m1[0],mm3);\
799 movd_m2r(lum[0],mm4);\
800 punpcklbw_r2r(mm7,mm0);\
801 punpcklbw_r2r(mm7,mm1);\
802 punpcklbw_r2r(mm7,mm2);\
803 punpcklbw_r2r(mm7,mm3);\
804 punpcklbw_r2r(mm7,mm4);\
805 paddw_r2r(mm3,mm1);\
806 psllw_i2r(1,mm2);\
807 paddw_r2r(mm4,mm0);\
808 psllw_i2r(2,mm1);\
809 paddw_r2r(mm6,mm2);\
810 paddw_r2r(mm2,mm1);\
811 psubusw_r2r(mm0,mm1);\
812 psrlw_i2r(3,mm1);\
813 packuswb_r2r(mm7,mm1);\
814 movd_r2m(mm1,dst[0]);
815 #endif
816
765 /* filter parameters: [-1 4 2 4 -1] // 8 */ 817 /* filter parameters: [-1 4 2 4 -1] // 8 */
766 static void deinterlace_line(UINT8 *dst, UINT8 *src, int src_wrap, 818 static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
767 int size) 819 int size)
768 { 820 {
821 #ifndef HAVE_MMX
769 UINT8 *cm = cropTbl + MAX_NEG_CROP; 822 UINT8 *cm = cropTbl + MAX_NEG_CROP;
770 int sum; 823 int sum;
771 UINT8 *s;
772 824
773 for(;size > 0;size--) { 825 for(;size > 0;size--) {
774 s = src; 826 sum = -lum_m4[0];
775 sum = -s[0]; 827 sum += lum_m3[0] << 2;
776 s += src_wrap; 828 sum += lum_m2[0] << 1;
777 sum += s[0] << 2; 829 sum += lum_m1[0] << 2;
778 s += src_wrap; 830 sum += -lum[0];
779 sum += s[0] << 1;
780 s += src_wrap;
781 sum += s[0] << 2;
782 s += src_wrap;
783 sum += -s[0];
784 dst[0] = cm[(sum + 4) >> 3]; 831 dst[0] = cm[(sum + 4) >> 3];
832 lum_m4++;
833 lum_m3++;
834 lum_m2++;
835 lum_m1++;
836 lum++;
785 dst++; 837 dst++;
786 src++; 838 }
787 } 839 #else
840
841 for (;size > 3; size-=4) {
842 DEINT_LINE_LUM
843 lum_m4+=4;
844 lum_m3+=4;
845 lum_m2+=4;
846 lum_m1+=4;
847 lum+=4;
848 dst+=4;
849 }
850 #endif
851 }
852 static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
853 int size)
854 {
855 #ifndef HAVE_MMX
856 UINT8 *cm = cropTbl + MAX_NEG_CROP;
857 int sum;
858
859 for(;size > 0;size--) {
860 sum = -lum_m4[0];
861 sum += lum_m3[0] << 2;
862 sum += lum_m2[0] << 1;
863 lum_m4[0]=lum_m2[0];
864 sum += lum_m1[0] << 2;
865 sum += -lum[0];
866 lum_m2[0] = cm[(sum + 4) >> 3];
867 lum_m4++;
868 lum_m3++;
869 lum_m2++;
870 lum_m1++;
871 lum++;
872 }
873 #else
874
875 for (;size > 3; size-=4) {
876 DEINT_INPLACE_LINE_LUM
877 lum_m4+=4;
878 lum_m3+=4;
879 lum_m2+=4;
880 lum_m1+=4;
881 lum+=4;
882 }
883 #endif
788 } 884 }
789 885
790 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The 886 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
791 top field is copied as is, but the bottom field is deinterlaced 887 top field is copied as is, but the bottom field is deinterlaced
792 against the top field. */ 888 against the top field. */
793 static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap, 889 static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
794 UINT8 *src1, int src_wrap, 890 UINT8 *src1, int src_wrap,
891 int width, int height)
892 {
893 UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
894 int y;
895
896 src_m2 = src1;
897 src_m1 = src1;
898 src_0=&src_m1[src_wrap];
899 src_p1=&src_0[src_wrap];
900 src_p2=&src_p1[src_wrap];
901 for(y=0;y<(height-2);y+=2) {
902 memcpy(dst,src_m1,width);
903 dst += dst_wrap;
904 deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
905 src_m2 = src_0;
906 src_m1 = src_p1;
907 src_0 = src_p2;
908 src_p1 += 2*src_wrap;
909 src_p2 += 2*src_wrap;
910 dst += dst_wrap;
911 }
912 memcpy(dst,src_m1,width);
913 dst += dst_wrap;
914 /* do last line */
915 deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
916 }
917
918 static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap,
795 int width, int height) 919 int width, int height)
796 { 920 {
797 UINT8 *src, *ptr; 921 UINT8 *src_m1, *src_0, *src_p1, *src_p2;
798 int y, y1, i; 922 int y;
799 UINT8 *buf; 923 UINT8 *buf;
800 924 buf = (UINT8*)av_malloc(width);
801 buf = (UINT8*)av_malloc(5 * width); 925
802 926 src_m1 = src1;
803 src = src1; 927 memcpy(buf,src_m1,width);
804 for(y=0;y<height;y+=2) { 928 src_0=&src_m1[src_wrap];
805 /* copy top field line */ 929 src_p1=&src_0[src_wrap];
806 memcpy(dst, src, width); 930 src_p2=&src_p1[src_wrap];
807 dst += dst_wrap; 931 for(y=0;y<(height-2);y+=2) {
808 src += (1 - 2) * src_wrap; 932 deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
809 y1 = y - 2; 933 src_m1 = src_p1;
810 if (y1 >= 0 && (y1 + 4) < height) { 934 src_0 = src_p2;
811 /* fast case : no edges */ 935 src_p1 += 2*src_wrap;
812 deinterlace_line(dst, src, src_wrap, width); 936 src_p2 += 2*src_wrap;
813 } else { 937 }
814 /* in order to use the same function, we use an intermediate buffer */ 938 /* do last line */
815 ptr = buf; 939 deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
816 for(i=0;i<5;i++) {
817 if (y1 < 0)
818 memcpy(ptr, src1, width);
819 else if (y1 >= height)
820 memcpy(ptr, src1 + (height - 1) * src_wrap, width);
821 else
822 memcpy(ptr, src1 + y1 * src_wrap, width);
823 y1++;
824 ptr += width;
825 }
826 deinterlace_line(dst, buf, width, width);
827 }
828 dst += dst_wrap;
829 src += (2 + 1) * src_wrap;
830 }
831 av_free(buf); 940 av_free(buf);
832 } 941 }
833 942
834 943
835 /* deinterlace, return -1 if format not handled */ 944 /* deinterlace - if not supported return -1 */
836 int avpicture_deinterlace(AVPicture *dst, AVPicture *src, 945 int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
837 int pix_fmt, int width, int height) 946 int pix_fmt, int width, int height)
838 { 947 {
839 int i; 948 int i;
840 949
841 if (pix_fmt != PIX_FMT_YUV420P && 950 if (pix_fmt != PIX_FMT_YUV420P &&
842 pix_fmt != PIX_FMT_YUV422P && 951 pix_fmt != PIX_FMT_YUV422P &&
843 pix_fmt != PIX_FMT_YUV444P) 952 pix_fmt != PIX_FMT_YUV444P)
844 return -1; 953 return -1;
845 if ((width & 1) != 0 || (height & 3) != 0) 954 if ((width & 3) != 0 || (height & 3) != 0)
846 return -1; 955 return -1;
956
957 #ifdef HAVE_MMX
958 {
959 mmx_t rounder;
960 rounder.uw[0]=4;
961 rounder.uw[1]=4;
962 rounder.uw[2]=4;
963 rounder.uw[3]=4;
964 pxor_r2r(mm7,mm7);
965 movq_m2r(rounder,mm6);
966 }
967 #endif
968
847 969
848 for(i=0;i<3;i++) { 970 for(i=0;i<3;i++) {
849 if (i == 1) { 971 if (i == 1) {
850 switch(pix_fmt) { 972 switch(pix_fmt) {
851 case PIX_FMT_YUV420P: 973 case PIX_FMT_YUV420P:
857 break; 979 break;
858 default: 980 default:
859 break; 981 break;
860 } 982 }
861 } 983 }
862 deinterlace_bottom_field(dst->data[i], dst->linesize[i], 984 if (src == dst) {
863 src->data[i], src->linesize[i], 985 deinterlace_bottom_field_inplace(src->data[i], src->linesize[i],
864 width, height); 986 width, height);
865 } 987 } else {
988 deinterlace_bottom_field(dst->data[i],dst->linesize[i],
989 src->data[i], src->linesize[i],
990 width, height);
991 }
992 }
993 #ifdef HAVE_MMX
994 emms();
995 #endif
866 return 0; 996 return 0;
867 } 997 }
868 998
869 #undef FIX 999 #undef FIX