# HG changeset patch # User vitor # Date 1280587851 0 # Node ID b01d01738bc4fa5346e8db7ee3de21974e100eaa # Parent f903ba955ae7e75248c080a39dfa6ed4fb8ab22f Convert deinterlacing MMX code to YASM diff -r f903ba955ae7 -r b01d01738bc4 imgconvert.c --- a/imgconvert.c Fri Jul 30 21:33:51 2010 +0000 +++ b/imgconvert.c Sat Jul 31 14:50:51 2010 +0000 @@ -39,7 +39,6 @@ #include "libavcore/imgutils.h" #if HAVE_MMX -#include "x86/mmx.h" #include "x86/dsputil_mmx.h" #endif @@ -55,6 +54,14 @@ #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */ #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */ +#if HAVE_MMX +#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx +#define deinterlace_line ff_deinterlace_line_mmx +#else +#define deinterlace_line_inplace deinterlace_line_inplace_c +#define deinterlace_line deinterlace_line_c +#endif + typedef struct PixFmtInfo { uint8_t nb_channels; /**< number of channels (including alpha) */ uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */ @@ -1119,61 +1126,14 @@ return ret; } -#if HAVE_MMX -#define DEINT_INPLACE_LINE_LUM \ - movd_m2r(lum_m4[0],mm0);\ - movd_m2r(lum_m3[0],mm1);\ - movd_m2r(lum_m2[0],mm2);\ - movd_m2r(lum_m1[0],mm3);\ - movd_m2r(lum[0],mm4);\ - punpcklbw_r2r(mm7,mm0);\ - movd_r2m(mm2,lum_m4[0]);\ - punpcklbw_r2r(mm7,mm1);\ - punpcklbw_r2r(mm7,mm2);\ - punpcklbw_r2r(mm7,mm3);\ - punpcklbw_r2r(mm7,mm4);\ - paddw_r2r(mm3,mm1);\ - psllw_i2r(1,mm2);\ - paddw_r2r(mm4,mm0);\ - psllw_i2r(2,mm1);\ - paddw_r2r(mm6,mm2);\ - paddw_r2r(mm2,mm1);\ - psubusw_r2r(mm0,mm1);\ - psrlw_i2r(3,mm1);\ - packuswb_r2r(mm7,mm1);\ - movd_r2m(mm1,lum_m2[0]); - -#define DEINT_LINE_LUM \ - movd_m2r(lum_m4[0],mm0);\ - movd_m2r(lum_m3[0],mm1);\ - movd_m2r(lum_m2[0],mm2);\ - movd_m2r(lum_m1[0],mm3);\ - movd_m2r(lum[0],mm4);\ - punpcklbw_r2r(mm7,mm0);\ - punpcklbw_r2r(mm7,mm1);\ - punpcklbw_r2r(mm7,mm2);\ - punpcklbw_r2r(mm7,mm3);\ - punpcklbw_r2r(mm7,mm4);\ - paddw_r2r(mm3,mm1);\ - psllw_i2r(1,mm2);\ - paddw_r2r(mm4,mm0);\ - psllw_i2r(2,mm1);\ - paddw_r2r(mm6,mm2);\ - paddw_r2r(mm2,mm1);\ - psubusw_r2r(mm0,mm1);\ - psrlw_i2r(3,mm1);\ - packuswb_r2r(mm7,mm1);\ - movd_r2m(mm1,dst[0]); -#endif - +#if !HAVE_MMX /* filter parameters: [-1 4 2 4 -1] // 8 */ -static void deinterlace_line(uint8_t *dst, +static void deinterlace_line_c(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size) { -#if !HAVE_MMX uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int sum; @@ -1191,27 +1151,12 @@ lum++; dst++; } -#else +} - { - pxor_r2r(mm7,mm7); - movq_m2r(ff_pw_4,mm6); - } - for (;size > 3; size-=4) { - DEINT_LINE_LUM - lum_m4+=4; - lum_m3+=4; - lum_m2+=4; - lum_m1+=4; - lum+=4; - dst+=4; - } -#endif -} -static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum, - int size) +static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3, + uint8_t *lum_m2, uint8_t *lum_m1, + uint8_t *lum, int size) { -#if !HAVE_MMX uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int sum; @@ -1229,22 +1174,8 @@ lum_m1++; lum++; } -#else - - { - pxor_r2r(mm7,mm7); - movq_m2r(ff_pw_4,mm6); - } - for (;size > 3; size-=4) { - DEINT_INPLACE_LINE_LUM - lum_m4+=4; - lum_m3+=4; - lum_m2+=4; - lum_m1+=4; - lum+=4; - } +} #endif -} /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The top field is copied as is, but the bottom field is deinterlaced diff -r f903ba955ae7 -r b01d01738bc4 x86/Makefile --- a/x86/Makefile Fri Jul 30 21:33:51 2010 +0000 +++ b/x86/Makefile Sat Jul 31 14:50:51 2010 +0000 @@ -35,6 +35,7 @@ YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \ + x86/deinterlace.o \ $(YASM-OBJS-yes) MMX-OBJS-$(CONFIG_FFT) += x86/fft.o diff -r f903ba955ae7 -r b01d01738bc4 x86/deinterlace.asm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/x86/deinterlace.asm Sat Jul 31 14:50:51 2010 +0000 @@ -0,0 +1,81 @@ +;****************************************************************************** +;* MMX optimized deinterlacing functions +;* Copyright (c) 2010 Vitor Sessak +;* Copyright (c) 2002 Michael Niedermayer +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA + +cextern pw_4 + +%macro DEINTERLACE 1 +%ifidn %1, inplace +;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size) +cglobal deinterlace_line_inplace_mmx, 6,6,7, lum_m4, lum_m3, lum_m2, lum_m1, lum, size +%else +;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size) +cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size +%endif + pxor mm7, mm7 + movq mm6, [pw_4] +.nextrow + movd mm0, [lum_m4q] + movd mm1, [lum_m3q] + movd mm2, [lum_m2q] +%ifidn %1, inplace + movd [lum_m4q], mm2 +%endif + movd mm3, [lum_m1q] + movd mm4, [lumq] + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 + punpcklbw mm3, mm7 + punpcklbw mm4, mm7 + paddw mm1, mm3 + psllw mm2, 1 + paddw mm0, mm4 + psllw mm1, 2 + paddw mm2, mm6 + paddw mm1, mm2 + psubusw mm1, mm0 + psrlw mm1, 3 + packuswb mm1, mm7 +%ifidn %1, inplace + movd [lum_m2q], mm1 +%else + movd [dstq], mm1 + add dstq, 4 +%endif + add lum_m4q, 4 + add lum_m3q, 4 + add lum_m2q, 4 + add lum_m1q, 4 + add lumq, 4 + sub sized, 4 + jg .nextrow + REP_RET +%endmacro + +DEINTERLACE "" + +DEINTERLACE inplace diff -r f903ba955ae7 -r b01d01738bc4 x86/dsputil_mmx.h --- a/x86/dsputil_mmx.h Fri Jul 30 21:33:51 2010 +0000 +++ b/x86/dsputil_mmx.h Sat Jul 31 14:50:51 2010 +0000 @@ -179,4 +179,17 @@ void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); + +void ff_deinterlace_line_mmx(uint8_t *dst, + const uint8_t *lum_m4, const uint8_t *lum_m3, + const uint8_t *lum_m2, const uint8_t *lum_m1, + const uint8_t *lum, + int size); + +void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, + const uint8_t *lum_m3, + const uint8_t *lum_m2, + const uint8_t *lum_m1, + const uint8_t *lum, int size); + #endif /* AVCODEC_X86_DSPUTIL_MMX_H */