comparison postproc/postprocess.c @ 2221:9fd911c931cd

minor cleanups median deinterlace in MMX fixed typos
author michael
date Tue, 16 Oct 2001 02:31:14 +0000
parents f90b6e259dc8
children 440b15b32181
comparison
equal deleted inserted replaced
2220:58555da47151 2221:9fd911c931cd
30 Vertical RKAlgo1 E a a 30 Vertical RKAlgo1 E a a
31 Vertical X1 a E E 31 Vertical X1 a E E
32 Horizontal X1 a E E 32 Horizontal X1 a E E
33 LinIpolDeinterlace a E E* 33 LinIpolDeinterlace a E E*
34 LinBlendDeinterlace a E E* 34 LinBlendDeinterlace a E E*
35 MedianDeinterlace a E 35 MedianDeinterlace Ec Ec
36 36
37 37
38 * i dont have a 3dnow CPU -> its untested 38 * i dont have a 3dnow CPU -> its untested
39 E = Exact implementation 39 E = Exact implementation
40 e = allmost exact implementation 40 e = allmost exact implementation
54 write a faster and higher quality deblocking filter :) 54 write a faster and higher quality deblocking filter :)
55 do something about the speed of the horizontal filters 55 do something about the speed of the horizontal filters
56 make the mainloop more flexible (variable number of blocks at once 56 make the mainloop more flexible (variable number of blocks at once
57 (the if/else stuff per block is slowing things down) 57 (the if/else stuff per block is slowing things down)
58 compare the quality & speed of all filters 58 compare the quality & speed of all filters
59 implement a few simple deinterlacing filters
60 split this huge file 59 split this huge file
61 fix warnings (unused vars, ...) 60 fix warnings (unused vars, ...)
61 noise reduction filters
62 ... 62 ...
63 63
64 Notes: 64 Notes:
65 65
66
66 */ 67 */
67 68
68 /* 69 //Changelog: use the CVS log
69 Changelog: use the CVS log
70 rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
71 added deinterlace filters (linear interpolate, linear blend, median)
72 minor cleanups (removed some outcommented stuff)
73 0.1.3
74 bugfixes: last 3 lines not brightness/contrast corrected
75 brightness statistics messed up with initial black pic
76 changed initial values of the brightness statistics
77 C++ -> C conversation
78 QP range question solved (very likely 1<=QP<=32 according to arpi)
79 new experimental vertical deblocking filter
80 RK filter has 3dNow support now (untested)
81 0.1.2
82 fixed a bug in the horizontal default filter
83 3dnow version of the Horizontal & Vertical Lowpass filters
84 mmx version of the Horizontal Default filter
85 mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar
86 added mode flags & quality2mode function
87 0.1.1
88 */
89
90 70
91 #include <inttypes.h> 71 #include <inttypes.h>
92 #include <stdio.h> 72 #include <stdio.h>
93 #include <stdlib.h> 73 #include <stdlib.h>
94 #include "../config.h" 74 #include "../config.h"
152 132
153 int maxAllowedY=255; 133 int maxAllowedY=255;
154 //FIXME can never make a movieŽs black brighter (anyone needs that?) 134 //FIXME can never make a movieŽs black brighter (anyone needs that?)
155 int minAllowedY=0; 135 int minAllowedY=0;
156 136
157 #ifdef TIMEING 137 #ifdef TIMING
158 static inline long long rdtsc() 138 static inline long long rdtsc()
159 { 139 {
160 long long l; 140 long long l;
161 asm volatile( "rdtsc\n\t" 141 asm volatile( "rdtsc\n\t"
162 : "=A" (l) 142 : "=A" (l)
362 342
363 } 343 }
364 344
365 /** 345 /**
366 * Do a vertical low pass filter on the 8x10 block (only write to the 8x8 block in the middle) 346 * Do a vertical low pass filter on the 8x10 block (only write to the 8x8 block in the middle)
367 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 347 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
368 */ 348 */
369 static inline void doVertLowPass(uint8_t *src, int stride, int QP) 349 static inline void doVertLowPass(uint8_t *src, int stride, int QP)
370 { 350 {
371 // QP= 64; 351 // QP= 64;
372 352
1581 #endif 1561 #endif
1582 } 1562 }
1583 1563
1584 /** 1564 /**
1585 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) 1565 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
1586 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) 1566 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
1587 * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) 1567 * using the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version)
1588 */ 1568 */
1589 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) 1569 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
1590 { 1570 {
1591 //return; 1571 //return;
1592 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1572 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2122 * Deinterlaces the given block 2102 * Deinterlaces the given block
2123 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block 2103 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
2124 */ 2104 */
2125 static inline void deInterlaceMedian(uint8_t src[], int stride) 2105 static inline void deInterlaceMedian(uint8_t src[], int stride)
2126 { 2106 {
2127 #if defined (HAVE_MMX2) 2107 #ifdef HAVE_MMX
2108 #ifdef HAVE_MMX2
2128 asm volatile( 2109 asm volatile(
2129 "leal (%0, %1), %%eax \n\t" 2110 "leal (%0, %1), %%eax \n\t"
2130 "leal (%%eax, %1, 4), %%ebx \n\t" 2111 "leal (%%eax, %1, 4), %%ebx \n\t"
2131 // 0 1 2 3 4 5 6 7 8 9 2112 // 0 1 2 3 4 5 6 7 8 9
2132 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 2113 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2170 2151
2171 2152
2172 : : "r" (src), "r" (stride) 2153 : : "r" (src), "r" (stride)
2173 : "%eax", "%ebx" 2154 : "%eax", "%ebx"
2174 ); 2155 );
2156
2157 #else // MMX without MMX2
2158 asm volatile(
2159 "leal (%0, %1), %%eax \n\t"
2160 "leal (%%eax, %1, 4), %%ebx \n\t"
2161 // 0 1 2 3 4 5 6 7 8 9
2162 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2163 "pxor %%mm7, %%mm7 \n\t"
2164
2165 #define MEDIAN(a,b,c)\
2166 "movq " #a ", %%mm0 \n\t"\
2167 "movq " #b ", %%mm2 \n\t"\
2168 "movq " #c ", %%mm1 \n\t"\
2169 "movq %%mm0, %%mm3 \n\t"\
2170 "movq %%mm1, %%mm4 \n\t"\
2171 "movq %%mm2, %%mm5 \n\t"\
2172 "psubusb %%mm1, %%mm3 \n\t"\
2173 "psubusb %%mm2, %%mm4 \n\t"\
2174 "psubusb %%mm0, %%mm5 \n\t"\
2175 "pcmpeqb %%mm7, %%mm3 \n\t"\
2176 "pcmpeqb %%mm7, %%mm4 \n\t"\
2177 "pcmpeqb %%mm7, %%mm5 \n\t"\
2178 "movq %%mm3, %%mm6 \n\t"\
2179 "pxor %%mm4, %%mm3 \n\t"\
2180 "pxor %%mm5, %%mm4 \n\t"\
2181 "pxor %%mm6, %%mm5 \n\t"\
2182 "por %%mm3, %%mm1 \n\t"\
2183 "por %%mm4, %%mm2 \n\t"\
2184 "por %%mm5, %%mm0 \n\t"\
2185 "pand %%mm2, %%mm0 \n\t"\
2186 "pand %%mm1, %%mm0 \n\t"\
2187 "movq %%mm0, " #b " \n\t"
2188
2189 MEDIAN((%0), (%%eax), (%%eax, %1))
2190 MEDIAN((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4))
2191 MEDIAN((%0, %1, 4), (%%ebx), (%%ebx, %1))
2192 MEDIAN((%%ebx, %1), (%%ebx, %1, 2), (%0, %1, 8))
2193
2194 : : "r" (src), "r" (stride)
2195 : "%eax", "%ebx"
2196 );
2197 #endif // MMX
2175 #else 2198 #else
2176 //FIXME 2199 //FIXME
2177 int x; 2200 int x;
2178 for(x=0; x<8; x++) 2201 for(x=0; x<8; x++)
2179 { 2202 {
2191 } 2214 }
2192 2215
2193 /** 2216 /**
2194 * Deinterlaces the given block 2217 * Deinterlaces the given block
2195 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block 2218 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
2196 * will shift the image up by 1 line (FIXME if this is a problem)
2197 */ 2219 */
2198 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) 2220 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride)
2199 { 2221 {
2200 #if defined (HAVE_MMX2) 2222 #ifdef HAVE_MMX
2223 #ifdef HAVE_MMX2
2201 asm volatile( 2224 asm volatile(
2202 "leal (%0, %1), %%eax \n\t" 2225 "leal (%0, %1), %%eax \n\t"
2203 "leal (%%eax, %1, 4), %%ebx \n\t" 2226 "leal (%%eax, %1, 4), %%ebx \n\t"
2204 // 0 1 2 3 4 5 6 7 8 9 2227 // 0 1 2 3 4 5 6 7 8 9
2205 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 2228 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2235 "movq %%mm1, (%%ebx, %1, 2) \n\t" 2258 "movq %%mm1, (%%ebx, %1, 2) \n\t"
2236 2259
2237 : : "r" (src), "r" (stride) 2260 : : "r" (src), "r" (stride)
2238 : "%eax", "%ebx" 2261 : "%eax", "%ebx"
2239 ); 2262 );
2263 #else //MMX & no MMX2
2264 asm volatile(
2265 "leal (%0, %1), %%eax \n\t"
2266 "leal (%%eax, %1, 4), %%ebx \n\t"
2267 // 0 1 2 3 4 5 6 7 8 9
2268 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2269 "pxor %%mm7, %%mm7 \n\t"
2270
2271 MEDIAN((%0), (%%eax), (%%eax, %1))
2272 MEDIAN((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4))
2273 MEDIAN((%0, %1, 4), (%%ebx), (%%ebx, %1))
2274
2275 "movq (%%ebx, %1), %%mm0 \n\t"
2276 "movq %%mm0, (%%ebx, %1, 2) \n\t"
2277
2278 : : "r" (src), "r" (stride)
2279 : "%eax", "%ebx"
2280 );
2281
2282 #endif //MMX
2240 #else 2283 #else
2241 //FIXME 2284 //FIXME
2242 int x; 2285 int x;
2243 for(x=0; x<8; x++) 2286 for(x=0; x<8; x++)
2244 { 2287 {
2253 src++; 2296 src++;
2254 } 2297 }
2255 #endif 2298 #endif
2256 } 2299 }
2257 2300
2258
2259 #ifdef HAVE_ODIVX_POSTPROCESS 2301 #ifdef HAVE_ODIVX_POSTPROCESS
2260 #include "../opendivx/postprocess.h" 2302 #include "../opendivx/postprocess.h"
2261 int use_old_pp=0; 2303 int use_old_pp=0;
2262 #endif 2304 #endif
2263 2305
2264 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 2306 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
2265 QP_STORE_T QPs[], int QPStride, int isColor, int mode); 2307 QP_STORE_T QPs[], int QPStride, int isColor, int mode);
2266 2308
2267 /** 2309 /**
2268 * ... 2310 * ...
2269 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63)
2270 * -63 is best quality -1 is worst
2271 */ 2311 */
2272 void postprocess(unsigned char * src[], int src_stride, 2312 void postprocess(unsigned char * src[], int src_stride,
2273 unsigned char * dst[], int dst_stride, 2313 unsigned char * dst[], int dst_stride,
2274 int horizontal_size, int vertical_size, 2314 int horizontal_size, int vertical_size,
2275 QP_STORE_T *QP_store, int QP_stride, 2315 QP_STORE_T *QP_store, int QP_stride,
2282 if(use_old_pp){ 2322 if(use_old_pp){
2283 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode); 2323 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode);
2284 return; 2324 return;
2285 } 2325 }
2286 #endif 2326 #endif
2287
2288 // I'm calling this from dec_video.c:video_set_postprocess()
2289 // if(mode<0) mode= getModeForQuality(-mode);
2290 2327
2291 /* 2328 /*
2292 long long T= rdtsc(); 2329 long long T= rdtsc();
2293 for(int y=vertical_size-1; y>=0 ; y--) 2330 for(int y=vertical_size-1; y>=0 ; y--)
2294 memcpy(dst[0] + y*src_stride, src[0] + y*src_stride,src_stride); 2331 memcpy(dst[0] + y*src_stride, src[0] + y*src_stride,src_stride);
2498 /* we need 64bit here otherwise weŽll going to have a problem 2535 /* we need 64bit here otherwise weŽll going to have a problem
2499 after watching a black picture for 5 hours*/ 2536 after watching a black picture for 5 hours*/
2500 static uint64_t *yHistogram= NULL; 2537 static uint64_t *yHistogram= NULL;
2501 int black=0, white=255; // blackest black and whitest white in the picture 2538 int black=0, white=255; // blackest black and whitest white in the picture
2502 2539
2503 #ifdef TIMEING 2540 #ifdef TIMING
2504 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; 2541 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
2505 sumTime= rdtsc(); 2542 sumTime= rdtsc();
2506 #endif 2543 #endif
2507 2544
2508 if(!yHistogram) 2545 if(!yHistogram)
2599 #endif 2636 #endif
2600 2637
2601 2638
2602 if(y + 12 < height) 2639 if(y + 12 < height)
2603 { 2640 {
2604 #ifdef MORE_TIMEING 2641 #ifdef MORE_TIMING
2605 T0= rdtsc(); 2642 T0= rdtsc();
2606 #endif 2643 #endif
2607 2644
2608 #ifdef HAVE_MMX2 2645 #ifdef HAVE_MMX2
2609 prefetchnta(vertSrcBlock + (((x>>3)&3) + 2)*srcStride + 32); 2646 prefetchnta(vertSrcBlock + (((x>>3)&3) + 2)*srcStride + 32);
2633 deInterlaceInterpolateCubic(dstBlock, dstStride); 2670 deInterlaceInterpolateCubic(dstBlock, dstStride);
2634 else if(mode & CUBIC_BLEND_DEINT_FILTER) 2671 else if(mode & CUBIC_BLEND_DEINT_FILTER)
2635 deInterlaceBlendCubic(dstBlock, dstStride); 2672 deInterlaceBlendCubic(dstBlock, dstStride);
2636 */ 2673 */
2637 2674
2638 #ifdef MORE_TIMEING 2675 #ifdef MORE_TIMING
2639 T1= rdtsc(); 2676 T1= rdtsc();
2640 memcpyTime+= T1-T0; 2677 memcpyTime+= T1-T0;
2641 T0=T1; 2678 T0=T1;
2642 #endif 2679 #endif
2643 if(mode & V_DEBLOCK) 2680 if(mode & V_DEBLOCK)
2655 } 2692 }
2656 else 2693 else
2657 doVertDefFilter(vertBlock, stride, QP); 2694 doVertDefFilter(vertBlock, stride, QP);
2658 } 2695 }
2659 } 2696 }
2660 #ifdef MORE_TIMEING 2697 #ifdef MORE_TIMING
2661 T1= rdtsc(); 2698 T1= rdtsc();
2662 vertTime+= T1-T0; 2699 vertTime+= T1-T0;
2663 T0=T1; 2700 T0=T1;
2664 #endif 2701 #endif
2665 } 2702 }
2681 */ 2718 */
2682 } 2719 }
2683 2720
2684 if(x - 8 >= 0 && x<width) 2721 if(x - 8 >= 0 && x<width)
2685 { 2722 {
2686 #ifdef MORE_TIMEING 2723 #ifdef MORE_TIMING
2687 T0= rdtsc(); 2724 T0= rdtsc();
2688 #endif 2725 #endif
2689 if(mode & H_DEBLOCK) 2726 if(mode & H_DEBLOCK)
2690 { 2727 {
2691 if(mode & H_X1_FILTER) 2728 if(mode & H_X1_FILTER)
2699 } 2736 }
2700 else 2737 else
2701 doHorizDefFilterAndCopyBack(dstBlock-4, stride, QP); 2738 doHorizDefFilterAndCopyBack(dstBlock-4, stride, QP);
2702 } 2739 }
2703 } 2740 }
2704 #ifdef MORE_TIMEING 2741 #ifdef MORE_TIMING
2705 T1= rdtsc(); 2742 T1= rdtsc();
2706 horizTime+= T1-T0; 2743 horizTime+= T1-T0;
2707 T0=T1; 2744 T0=T1;
2708 #endif 2745 #endif
2709 dering(dstBlock - 9 - stride, stride, QP); 2746 dering(dstBlock - 9 - stride, stride, QP);
2723 asm volatile("femms"); 2760 asm volatile("femms");
2724 #elif defined (HAVE_MMX) 2761 #elif defined (HAVE_MMX)
2725 asm volatile("emms"); 2762 asm volatile("emms");
2726 #endif 2763 #endif
2727 2764
2728 #ifdef TIMEING 2765 #ifdef TIMING
2729 // FIXME diff is mostly the time spent for rdtsc (should subtract that but ...) 2766 // FIXME diff is mostly the time spent for rdtsc (should subtract that but ...)
2730 sumTime= rdtsc() - sumTime; 2767 sumTime= rdtsc() - sumTime;
2731 if(!isColor) 2768 if(!isColor)
2732 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r", 2769 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r",
2733 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000), 2770 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000),