changeset 2272:cd43603c46f9 libavcodec

move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1
author michael
date Mon, 27 Sep 2004 19:47:17 +0000
parents 859535b5eb57
children 80660c7fcc9c
files avcodec.h dsputil.c dsputil.h h264.c h264idct.c
diffstat 5 files changed, 92 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/avcodec.h	Mon Sep 27 17:46:07 2004 +0000
+++ b/avcodec.h	Mon Sep 27 19:47:17 2004 +0000
@@ -1113,6 +1113,7 @@
 #define FF_IDCT_ALTIVEC      8
 #define FF_IDCT_SH4          9
 #define FF_IDCT_SIMPLEARM    10
+#define FF_IDCT_H264         11
 
     /**
      * slice count.
--- a/dsputil.c	Mon Sep 27 17:46:07 2004 +0000
+++ b/dsputil.c	Mon Sep 27 19:47:17 2004 +0000
@@ -3434,8 +3434,13 @@
 #endif //CONFIG_ENCODERS
 
     if(avctx->lowres==1){
-        c->idct_put= ff_jref_idct4_put;
-        c->idct_add= ff_jref_idct4_add;
+        if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){
+            c->idct_put= ff_jref_idct4_put;
+            c->idct_add= ff_jref_idct4_add;
+        }else{
+            c->idct_put= ff_h264_lowres_idct_put_c;
+            c->idct_add= ff_h264_lowres_idct_add_c;
+        }
         c->idct    = j_rev_dct4;
         c->idct_permutation_type= FF_NO_IDCT_PERM;
     }else if(avctx->lowres==2){
@@ -3462,6 +3467,8 @@
         }
     }
 
+    c->h264_idct_add= ff_h264_idct_add_c;
+
     /* VP3 DSP support */
     c->vp3_dsp_init = vp3_dsp_init_c;
     c->vp3_idct = vp3_idct_c;
--- a/dsputil.h	Mon Sep 27 17:46:07 2004 +0000
+++ b/dsputil.h	Mon Sep 27 19:47:17 2004 +0000
@@ -50,6 +50,10 @@
 void ff_fdct_mmx2(DCTELEM *block);
 void ff_fdct_sse2(DCTELEM *block);
 
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
+
 /* encoding scans */
 extern const uint8_t ff_alternate_horizontal_scan[64];
 extern const uint8_t ff_alternate_vertical_scan[64];
@@ -330,7 +334,8 @@
      */
     void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix,
         int coeff_count, DCTELEM *output_samples);
-
+ 
+    void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
 } DSPContext;
 
 void dsputil_static_init(void);
--- a/h264.c	Mon Sep 27 17:46:07 2004 +0000
+++ b/h264.c	Mon Sep 27 19:47:17 2004 +0000
@@ -1323,40 +1323,6 @@
 }
 
 
-/**
- *
- */
-static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){
-    int i;
-    uint8_t *cm = cropTbl + MAX_NEG_CROP;
-
-    block[0] += 32;
-
-    for(i=0; i<4; i++){
-        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
-        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
-        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
-        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
-
-        block[0 + 4*i]= z0 + z3;
-        block[1 + 4*i]= z1 + z2;
-        block[2 + 4*i]= z1 - z2;
-        block[3 + 4*i]= z0 - z3;
-    }
-
-    for(i=0; i<4; i++){
-        const int z0=  block[i + 4*0]     +  block[i + 4*2];
-        const int z1=  block[i + 4*0]     -  block[i + 4*2];
-        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
-        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
-
-        dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ];
-        dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ];
-        dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ];
-        dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ];
-    }
-}
-
 #if 0
 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
     int i;
@@ -2440,7 +2406,7 @@
                     h->pred4x4[ dir ](ptr, topright, linesize);
                     if(h->non_zero_count_cache[ scan8[i] ]){
                         if(s->codec_id == CODEC_ID_H264)
-                            h264_add_idct_c(ptr, h->mb + i*16, linesize);
+                            s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize);
                         else
                             svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
                     }
@@ -2467,7 +2433,7 @@
             for(i=0; i<16; i++){
                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
                     uint8_t * const ptr= dest_y + h->block_offset[i];
-                    h264_add_idct_c(ptr, h->mb + i*16, linesize);
+                    s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize);
                 }
             }
         }else{
@@ -2487,13 +2453,13 @@
             for(i=16; i<16+4; i++){
                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
                     uint8_t * const ptr= dest_cb + h->block_offset[i];
-                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
+                    s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
                 }
             }
             for(i=20; i<20+4; i++){
                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
                     uint8_t * const ptr= dest_cr + h->block_offset[i];
-                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
+                    s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
                 }
             }
         }else{
@@ -3232,7 +3198,7 @@
     log= 32 - av_log2(buf);
 #ifdef TRACE
     print_bin(buf>>(32-log), log);
-    printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
 #endif
 
     LAST_SKIP_BITS(re, gb, log);
@@ -5975,7 +5941,7 @@
         }
 //        printf("\n");
         
-        h264_add_idct_c(ref, block, 4);
+        s->dsp.h264_idct_add(ref, block, 4);
 /*        for(j=0; j<16; j++){
             printf("%d ", ref[j]);
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/h264idct.c	Mon Sep 27 19:47:17 2004 +0000
@@ -0,0 +1,70 @@
+/*
+ * H.264 IDCT
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+ 
+/**
+ * @file h264-idct.c
+ * H.264 IDCT.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+ 
+#include "dsputil.h"
+
+static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+    int i;
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
+
+    block[0] += 1<<(shift-1);
+
+    for(i=0; i<4; i++){
+        const int z0=  block[0 + block_stride*i]     +  block[2 + block_stride*i];
+        const int z1=  block[0 + block_stride*i]     -  block[2 + block_stride*i];
+        const int z2= (block[1 + block_stride*i]>>1) -  block[3 + block_stride*i];
+        const int z3=  block[1 + block_stride*i]     + (block[3 + block_stride*i]>>1);
+
+        block[0 + block_stride*i]= z0 + z3;
+        block[1 + block_stride*i]= z1 + z2;
+        block[2 + block_stride*i]= z1 - z2;
+        block[3 + block_stride*i]= z0 - z3;
+    }
+
+    for(i=0; i<4; i++){
+        const int z0=  block[i + block_stride*0]     +  block[i + block_stride*2];
+        const int z1=  block[i + block_stride*0]     -  block[i + block_stride*2];
+        const int z2= (block[i + block_stride*1]>>1) -  block[i + block_stride*3];
+        const int z3=  block[i + block_stride*1]     + (block[i + block_stride*3]>>1);
+
+        dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
+        dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
+        dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
+        dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
+    }
+}
+
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    idct_internal(dst, block, stride, 4, 6, 1);
+}
+
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 1);
+}
+
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 0);
+}