changeset 1719:4e72fb256b25 libavcodec

denoise_dct_mmx()
author michael
date Fri, 02 Jan 2004 22:29:08 +0000
parents fdd1bc71da55
children 96a86bd1e0d5
files i386/mpegvideo_mmx.c i386/mpegvideo_mmx_template.c mpegvideo.c mpegvideo.h
diffstat 4 files changed, 63 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/i386/mpegvideo_mmx.c	Fri Jan 02 19:22:00 2004 +0000
+++ b/i386/mpegvideo_mmx.c	Fri Jan 02 22:29:08 2004 +0000
@@ -563,6 +563,60 @@
     }
 }
 
+static void  denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
+    const int intra= s->mb_intra;
+    int *sum= s->dct_error_sum[intra];
+    uint16_t *offset= s->dct_offset[intra];
+
+    s->dct_count[intra]++;
+
+    asm volatile(
+        "pxor %%mm7, %%mm7		\n\t"
+        "1:				\n\t"
+        "pxor %%mm0, %%mm0		\n\t"
+        "pxor %%mm1, %%mm1		\n\t"
+        "movq (%0), %%mm2		\n\t"
+        "movq 8(%0), %%mm3		\n\t"
+        "pcmpgtw %%mm2, %%mm0		\n\t"
+        "pcmpgtw %%mm3, %%mm1		\n\t"
+        "pxor %%mm0, %%mm2		\n\t"
+        "pxor %%mm1, %%mm3		\n\t"
+        "psubw %%mm0, %%mm2		\n\t"
+        "psubw %%mm1, %%mm3		\n\t"
+        "movq %%mm2, %%mm4		\n\t"
+        "movq %%mm3, %%mm5		\n\t"
+        "psubusw (%2), %%mm2		\n\t"
+        "psubusw 8(%2), %%mm3		\n\t"
+        "pxor %%mm0, %%mm2		\n\t"
+        "pxor %%mm1, %%mm3		\n\t"
+        "psubw %%mm0, %%mm2		\n\t"
+        "psubw %%mm1, %%mm3		\n\t"
+        "movq %%mm2, (%0)		\n\t"
+        "movq %%mm3, 8(%0)		\n\t"
+        "movq %%mm4, %%mm2		\n\t"
+        "movq %%mm5, %%mm3		\n\t"
+        "punpcklwd %%mm7, %%mm4		\n\t"
+        "punpckhwd %%mm7, %%mm2		\n\t"
+        "punpcklwd %%mm7, %%mm5		\n\t"
+        "punpckhwd %%mm7, %%mm3		\n\t"
+        "paddd (%1), %%mm4		\n\t"
+        "paddd 8(%1), %%mm2		\n\t"
+        "paddd 16(%1), %%mm5		\n\t"
+        "paddd 24(%1), %%mm3		\n\t"
+        "movq %%mm4, (%1)		\n\t"
+        "movq %%mm2, 8(%1)		\n\t"
+        "movq %%mm5, 16(%1)		\n\t"
+        "movq %%mm3, 24(%1)		\n\t"
+        "addl $16, %0			\n\t"
+        "addl $32, %1			\n\t"
+        "addl $16, %2			\n\t"
+        "cmpl %3, %0			\n\t"
+            " jb 1b			\n\t"
+        : "+r" (block), "+r" (sum), "+r" (offset)
+        : "r"(block+64)
+    );
+}
+
 #undef HAVE_MMX2
 #define RENAME(a) a ## _MMX
 #define RENAMEl(a) a ## _mmx
@@ -588,6 +642,8 @@
         s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
 
         draw_edges = draw_edges_mmx;
+        
+        s->denoise_dct= denoise_dct_mmx;
 
         if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
             if(mm_flags & MM_MMXEXT){
--- a/i386/mpegvideo_mmx_template.c	Fri Jan 02 19:22:00 2004 +0000
+++ b/i386/mpegvideo_mmx_template.c	Fri Jan 02 22:29:08 2004 +0000
@@ -46,7 +46,7 @@
     RENAMEl(ff_fdct) (block); //cant be anything else ...
 
     if(s->dct_error_sum)
-        ff_denoise_dct(s, block);
+        s->denoise_dct(s, block);
 
     if (s->mb_intra) {
         int dummy;
--- a/mpegvideo.c	Fri Jan 02 19:22:00 2004 +0000
+++ b/mpegvideo.c	Fri Jan 02 22:29:08 2004 +0000
@@ -57,6 +57,7 @@
 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
 static int sse_mb(MpegEncContext *s);
+static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
 #endif //CONFIG_ENCODERS
 
 #ifdef HAVE_XVMC
@@ -219,6 +220,7 @@
 
 #ifdef CONFIG_ENCODERS
     s->dct_quantize= dct_quantize_c;
+    s->denoise_dct= denoise_dct_c;
 #endif
         
 #ifdef HAVE_MMX
@@ -4611,7 +4613,7 @@
 
 #endif //CONFIG_ENCODERS
 
-void ff_denoise_dct(MpegEncContext *s, DCTELEM *block){
+static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
     const int intra= s->mb_intra;
     int i;
 
@@ -4666,7 +4668,7 @@
     s->dsp.fdct (block);
     
     if(s->dct_error_sum)
-        ff_denoise_dct(s, block);
+        s->denoise_dct(s, block);
     qmul= qscale*16;
     qadd= ((qscale-1)|1)*8;
 
@@ -4939,7 +4941,7 @@
     s->dsp.fdct (block);
 
     if(s->dct_error_sum)
-        ff_denoise_dct(s, block);
+        s->denoise_dct(s, block);
 
     if (s->mb_intra) {
         if (!s->h263_aic) {
--- a/mpegvideo.h	Fri Jan 02 19:22:00 2004 +0000
+++ b/mpegvideo.h	Fri Jan 02 22:29:08 2004 +0000
@@ -681,6 +681,7 @@
                            DCTELEM *block/*align 16*/, int n, int qscale);
     int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
     int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
+    void (*denoise_dct)(struct MpegEncContext *s, DCTELEM *block);
 } MpegEncContext;