diff snow.h @ 5587:3ae03eacbe9f libavcodec

use 16bit IDWT (a SIMD implementation of it should be >2x faster then with the old 32bit code) disable mmx/sse2 optimizations as they need a rewrite now
author michael
date Sat, 25 Aug 2007 03:00:51 +0000
parents 93082c591c8b
children 946c2db0a093
line wrap: on
line diff
--- a/snow.h	Fri Aug 24 23:49:11 2007 +0000
+++ b/snow.h	Sat Aug 25 03:00:51 2007 +0000
@@ -31,7 +31,7 @@
 #define QSHIFT 5
 #define QROOT (1<<QSHIFT)
 #define LOSSLESS_QLOG -128
-#define FRAC_BITS 8
+#define FRAC_BITS 4
 #define MAX_REF_FRAMES 8
 
 #define LOG2_OBMC_MAX 8
@@ -43,17 +43,18 @@
 
 /** Used to minimize the amount of memory used in order to optimize cache performance. **/
 struct slice_buffer_s {
-    DWTELEM * * line; ///< For use by idwt and predict_slices.
-    DWTELEM * * data_stack; ///< Used for internal purposes.
+    IDWTELEM * * line; ///< For use by idwt and predict_slices.
+    IDWTELEM * * data_stack; ///< Used for internal purposes.
     int data_stack_top;
     int line_count;
     int line_width;
     int data_count;
-    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
+    IDWTELEM * base_buffer; ///< Buffer that this structure is caching.
 };
 
 #define liftS lift
 #define lift5 lift
+#define inv_lift5 inv_lift
 #if 1
 #define W_AM 3
 #define W_AO 0
@@ -123,8 +124,8 @@
 #define W_DS 9
 #endif
 
-extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
-extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
+extern void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
+extern void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
 extern void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
 
 #ifdef CONFIG_SNOW_ENCODER
@@ -137,7 +138,7 @@
 
 /* C bits used by mmx/sse2/altivec */
 
-static av_always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){
     (*i) = (width) - 2;
 
     if (width & 1){
@@ -146,14 +147,14 @@
     }
 }
 
-static av_always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
+static av_always_inline void snow_interleave_line_footer(int * i, IDWTELEM * low, IDWTELEM * high){
     for (; (*i)>=0; (*i)-=2){
         low[(*i)+1] = high[(*i)>>1];
         low[*i] = low[(*i)>>1];
     }
 }
 
-static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
+static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
     for(; i<w; i++){
         dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
     }
@@ -163,7 +164,7 @@
     }
 }
 
-static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
+static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w){
         for(; i<w; i++){
             dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS);
         }