Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 107:bd163e13a0fb libavcodec
minor cleanups
median deinterlace in MMX
fixed typos
author | michael |
---|---|
date | Tue, 16 Oct 2001 02:31:14 +0000 |
parents | 389391a6d0bf |
children | dfa9fde4b72d |
comparison
equal
deleted
inserted
replaced
106:389391a6d0bf | 107:bd163e13a0fb |
---|---|
30 Vertical RKAlgo1 E a a | 30 Vertical RKAlgo1 E a a |
31 Vertical X1 a E E | 31 Vertical X1 a E E |
32 Horizontal X1 a E E | 32 Horizontal X1 a E E |
33 LinIpolDeinterlace a E E* | 33 LinIpolDeinterlace a E E* |
34 LinBlendDeinterlace a E E* | 34 LinBlendDeinterlace a E E* |
35 MedianDeinterlace a E | 35 MedianDeinterlace Ec Ec |
36 | 36 |
37 | 37 |
38 * i dont have a 3dnow CPU -> its untested | 38 * i dont have a 3dnow CPU -> its untested |
39 E = Exact implementation | 39 E = Exact implementation |
40 e = allmost exact implementation | 40 e = allmost exact implementation |
54 write a faster and higher quality deblocking filter :) | 54 write a faster and higher quality deblocking filter :) |
55 do something about the speed of the horizontal filters | 55 do something about the speed of the horizontal filters |
56 make the mainloop more flexible (variable number of blocks at once | 56 make the mainloop more flexible (variable number of blocks at once |
57 (the if/else stuff per block is slowing things down) | 57 (the if/else stuff per block is slowing things down) |
58 compare the quality & speed of all filters | 58 compare the quality & speed of all filters |
59 implement a few simple deinterlacing filters | |
60 split this huge file | 59 split this huge file |
61 fix warnings (unused vars, ...) | 60 fix warnings (unused vars, ...) |
61 noise reduction filters | |
62 ... | 62 ... |
63 | 63 |
64 Notes: | 64 Notes: |
65 | 65 |
66 | |
66 */ | 67 */ |
67 | 68 |
68 /* | 69 //Changelog: use the CVS log |
69 Changelog: use the CVS log | |
70 rewrote the horizontal lowpass filter to fix a bug which caused a blocky look | |
71 added deinterlace filters (linear interpolate, linear blend, median) | |
72 minor cleanups (removed some outcommented stuff) | |
73 0.1.3 | |
74 bugfixes: last 3 lines not brightness/contrast corrected | |
75 brightness statistics messed up with initial black pic | |
76 changed initial values of the brightness statistics | |
77 C++ -> C conversation | |
78 QP range question solved (very likely 1<=QP<=32 according to arpi) | |
79 new experimental vertical deblocking filter | |
80 RK filter has 3dNow support now (untested) | |
81 0.1.2 | |
82 fixed a bug in the horizontal default filter | |
83 3dnow version of the Horizontal & Vertical Lowpass filters | |
84 mmx version of the Horizontal Default filter | |
85 mmx2 & C versions of a simple filter described in a paper from ramkishor & karandikar | |
86 added mode flags & quality2mode function | |
87 0.1.1 | |
88 */ | |
89 | |
90 | 70 |
91 #include <inttypes.h> | 71 #include <inttypes.h> |
92 #include <stdio.h> | 72 #include <stdio.h> |
93 #include <stdlib.h> | 73 #include <stdlib.h> |
94 #include "../config.h" | 74 #include "../config.h" |
152 | 132 |
153 int maxAllowedY=255; | 133 int maxAllowedY=255; |
154 //FIXME can never make a movieŽs black brighter (anyone needs that?) | 134 //FIXME can never make a movieŽs black brighter (anyone needs that?) |
155 int minAllowedY=0; | 135 int minAllowedY=0; |
156 | 136 |
157 #ifdef TIMEING | 137 #ifdef TIMING |
158 static inline long long rdtsc() | 138 static inline long long rdtsc() |
159 { | 139 { |
160 long long l; | 140 long long l; |
161 asm volatile( "rdtsc\n\t" | 141 asm volatile( "rdtsc\n\t" |
162 : "=A" (l) | 142 : "=A" (l) |
362 | 342 |
363 } | 343 } |
364 | 344 |
365 /** | 345 /** |
366 * Do a vertical low pass filter on the 8x10 block (only write to the 8x8 block in the middle) | 346 * Do a vertical low pass filter on the 8x10 block (only write to the 8x8 block in the middle) |
367 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 | 347 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 |
368 */ | 348 */ |
369 static inline void doVertLowPass(uint8_t *src, int stride, int QP) | 349 static inline void doVertLowPass(uint8_t *src, int stride, int QP) |
370 { | 350 { |
371 // QP= 64; | 351 // QP= 64; |
372 | 352 |
1581 #endif | 1561 #endif |
1582 } | 1562 } |
1583 | 1563 |
1584 /** | 1564 /** |
1585 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | 1565 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
1586 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | 1566 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
1587 * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) | 1567 * using the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) |
1588 */ | 1568 */ |
1589 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | 1569 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
1590 { | 1570 { |
1591 //return; | 1571 //return; |
1592 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1572 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
2122 * Deinterlaces the given block | 2102 * Deinterlaces the given block |
2123 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | 2103 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block |
2124 */ | 2104 */ |
2125 static inline void deInterlaceMedian(uint8_t src[], int stride) | 2105 static inline void deInterlaceMedian(uint8_t src[], int stride) |
2126 { | 2106 { |
2127 #if defined (HAVE_MMX2) | 2107 #ifdef HAVE_MMX |
2108 #ifdef HAVE_MMX2 | |
2128 asm volatile( | 2109 asm volatile( |
2129 "leal (%0, %1), %%eax \n\t" | 2110 "leal (%0, %1), %%eax \n\t" |
2130 "leal (%%eax, %1, 4), %%ebx \n\t" | 2111 "leal (%%eax, %1, 4), %%ebx \n\t" |
2131 // 0 1 2 3 4 5 6 7 8 9 | 2112 // 0 1 2 3 4 5 6 7 8 9 |
2132 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | 2113 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
2170 | 2151 |
2171 | 2152 |
2172 : : "r" (src), "r" (stride) | 2153 : : "r" (src), "r" (stride) |
2173 : "%eax", "%ebx" | 2154 : "%eax", "%ebx" |
2174 ); | 2155 ); |
2156 | |
2157 #else // MMX without MMX2 | |
2158 asm volatile( | |
2159 "leal (%0, %1), %%eax \n\t" | |
2160 "leal (%%eax, %1, 4), %%ebx \n\t" | |
2161 // 0 1 2 3 4 5 6 7 8 9 | |
2162 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
2163 "pxor %%mm7, %%mm7 \n\t" | |
2164 | |
2165 #define MEDIAN(a,b,c)\ | |
2166 "movq " #a ", %%mm0 \n\t"\ | |
2167 "movq " #b ", %%mm2 \n\t"\ | |
2168 "movq " #c ", %%mm1 \n\t"\ | |
2169 "movq %%mm0, %%mm3 \n\t"\ | |
2170 "movq %%mm1, %%mm4 \n\t"\ | |
2171 "movq %%mm2, %%mm5 \n\t"\ | |
2172 "psubusb %%mm1, %%mm3 \n\t"\ | |
2173 "psubusb %%mm2, %%mm4 \n\t"\ | |
2174 "psubusb %%mm0, %%mm5 \n\t"\ | |
2175 "pcmpeqb %%mm7, %%mm3 \n\t"\ | |
2176 "pcmpeqb %%mm7, %%mm4 \n\t"\ | |
2177 "pcmpeqb %%mm7, %%mm5 \n\t"\ | |
2178 "movq %%mm3, %%mm6 \n\t"\ | |
2179 "pxor %%mm4, %%mm3 \n\t"\ | |
2180 "pxor %%mm5, %%mm4 \n\t"\ | |
2181 "pxor %%mm6, %%mm5 \n\t"\ | |
2182 "por %%mm3, %%mm1 \n\t"\ | |
2183 "por %%mm4, %%mm2 \n\t"\ | |
2184 "por %%mm5, %%mm0 \n\t"\ | |
2185 "pand %%mm2, %%mm0 \n\t"\ | |
2186 "pand %%mm1, %%mm0 \n\t"\ | |
2187 "movq %%mm0, " #b " \n\t" | |
2188 | |
2189 MEDIAN((%0), (%%eax), (%%eax, %1)) | |
2190 MEDIAN((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4)) | |
2191 MEDIAN((%0, %1, 4), (%%ebx), (%%ebx, %1)) | |
2192 MEDIAN((%%ebx, %1), (%%ebx, %1, 2), (%0, %1, 8)) | |
2193 | |
2194 : : "r" (src), "r" (stride) | |
2195 : "%eax", "%ebx" | |
2196 ); | |
2197 #endif // MMX | |
2175 #else | 2198 #else |
2176 //FIXME | 2199 //FIXME |
2177 int x; | 2200 int x; |
2178 for(x=0; x<8; x++) | 2201 for(x=0; x<8; x++) |
2179 { | 2202 { |
2191 } | 2214 } |
2192 | 2215 |
2193 /** | 2216 /** |
2194 * Deinterlaces the given block | 2217 * Deinterlaces the given block |
2195 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | 2218 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block |
2196 * will shift the image up by 1 line (FIXME if this is a problem) | |
2197 */ | 2219 */ |
2198 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) | 2220 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) |
2199 { | 2221 { |
2200 #if defined (HAVE_MMX2) | 2222 #ifdef HAVE_MMX |
2223 #ifdef HAVE_MMX2 | |
2201 asm volatile( | 2224 asm volatile( |
2202 "leal (%0, %1), %%eax \n\t" | 2225 "leal (%0, %1), %%eax \n\t" |
2203 "leal (%%eax, %1, 4), %%ebx \n\t" | 2226 "leal (%%eax, %1, 4), %%ebx \n\t" |
2204 // 0 1 2 3 4 5 6 7 8 9 | 2227 // 0 1 2 3 4 5 6 7 8 9 |
2205 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | 2228 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 |
2235 "movq %%mm1, (%%ebx, %1, 2) \n\t" | 2258 "movq %%mm1, (%%ebx, %1, 2) \n\t" |
2236 | 2259 |
2237 : : "r" (src), "r" (stride) | 2260 : : "r" (src), "r" (stride) |
2238 : "%eax", "%ebx" | 2261 : "%eax", "%ebx" |
2239 ); | 2262 ); |
2263 #else //MMX & no MMX2 | |
2264 asm volatile( | |
2265 "leal (%0, %1), %%eax \n\t" | |
2266 "leal (%%eax, %1, 4), %%ebx \n\t" | |
2267 // 0 1 2 3 4 5 6 7 8 9 | |
2268 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
2269 "pxor %%mm7, %%mm7 \n\t" | |
2270 | |
2271 MEDIAN((%0), (%%eax), (%%eax, %1)) | |
2272 MEDIAN((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4)) | |
2273 MEDIAN((%0, %1, 4), (%%ebx), (%%ebx, %1)) | |
2274 | |
2275 "movq (%%ebx, %1), %%mm0 \n\t" | |
2276 "movq %%mm0, (%%ebx, %1, 2) \n\t" | |
2277 | |
2278 : : "r" (src), "r" (stride) | |
2279 : "%eax", "%ebx" | |
2280 ); | |
2281 | |
2282 #endif //MMX | |
2240 #else | 2283 #else |
2241 //FIXME | 2284 //FIXME |
2242 int x; | 2285 int x; |
2243 for(x=0; x<8; x++) | 2286 for(x=0; x<8; x++) |
2244 { | 2287 { |
2253 src++; | 2296 src++; |
2254 } | 2297 } |
2255 #endif | 2298 #endif |
2256 } | 2299 } |
2257 | 2300 |
2258 | |
2259 #ifdef HAVE_ODIVX_POSTPROCESS | 2301 #ifdef HAVE_ODIVX_POSTPROCESS |
2260 #include "../opendivx/postprocess.h" | 2302 #include "../opendivx/postprocess.h" |
2261 int use_old_pp=0; | 2303 int use_old_pp=0; |
2262 #endif | 2304 #endif |
2263 | 2305 |
2264 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 2306 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
2265 QP_STORE_T QPs[], int QPStride, int isColor, int mode); | 2307 QP_STORE_T QPs[], int QPStride, int isColor, int mode); |
2266 | 2308 |
2267 /** | 2309 /** |
2268 * ... | 2310 * ... |
2269 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | |
2270 * -63 is best quality -1 is worst | |
2271 */ | 2311 */ |
2272 void postprocess(unsigned char * src[], int src_stride, | 2312 void postprocess(unsigned char * src[], int src_stride, |
2273 unsigned char * dst[], int dst_stride, | 2313 unsigned char * dst[], int dst_stride, |
2274 int horizontal_size, int vertical_size, | 2314 int horizontal_size, int vertical_size, |
2275 QP_STORE_T *QP_store, int QP_stride, | 2315 QP_STORE_T *QP_store, int QP_stride, |
2282 if(use_old_pp){ | 2322 if(use_old_pp){ |
2283 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode); | 2323 odivx_postprocess(src,src_stride,dst,dst_stride,horizontal_size,vertical_size,QP_store,QP_stride,mode); |
2284 return; | 2324 return; |
2285 } | 2325 } |
2286 #endif | 2326 #endif |
2287 | |
2288 // I'm calling this from dec_video.c:video_set_postprocess() | |
2289 // if(mode<0) mode= getModeForQuality(-mode); | |
2290 | 2327 |
2291 /* | 2328 /* |
2292 long long T= rdtsc(); | 2329 long long T= rdtsc(); |
2293 for(int y=vertical_size-1; y>=0 ; y--) | 2330 for(int y=vertical_size-1; y>=0 ; y--) |
2294 memcpy(dst[0] + y*src_stride, src[0] + y*src_stride,src_stride); | 2331 memcpy(dst[0] + y*src_stride, src[0] + y*src_stride,src_stride); |
2498 /* we need 64bit here otherwise weŽll going to have a problem | 2535 /* we need 64bit here otherwise weŽll going to have a problem |
2499 after watching a black picture for 5 hours*/ | 2536 after watching a black picture for 5 hours*/ |
2500 static uint64_t *yHistogram= NULL; | 2537 static uint64_t *yHistogram= NULL; |
2501 int black=0, white=255; // blackest black and whitest white in the picture | 2538 int black=0, white=255; // blackest black and whitest white in the picture |
2502 | 2539 |
2503 #ifdef TIMEING | 2540 #ifdef TIMING |
2504 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; | 2541 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; |
2505 sumTime= rdtsc(); | 2542 sumTime= rdtsc(); |
2506 #endif | 2543 #endif |
2507 | 2544 |
2508 if(!yHistogram) | 2545 if(!yHistogram) |
2599 #endif | 2636 #endif |
2600 | 2637 |
2601 | 2638 |
2602 if(y + 12 < height) | 2639 if(y + 12 < height) |
2603 { | 2640 { |
2604 #ifdef MORE_TIMEING | 2641 #ifdef MORE_TIMING |
2605 T0= rdtsc(); | 2642 T0= rdtsc(); |
2606 #endif | 2643 #endif |
2607 | 2644 |
2608 #ifdef HAVE_MMX2 | 2645 #ifdef HAVE_MMX2 |
2609 prefetchnta(vertSrcBlock + (((x>>3)&3) + 2)*srcStride + 32); | 2646 prefetchnta(vertSrcBlock + (((x>>3)&3) + 2)*srcStride + 32); |
2633 deInterlaceInterpolateCubic(dstBlock, dstStride); | 2670 deInterlaceInterpolateCubic(dstBlock, dstStride); |
2634 else if(mode & CUBIC_BLEND_DEINT_FILTER) | 2671 else if(mode & CUBIC_BLEND_DEINT_FILTER) |
2635 deInterlaceBlendCubic(dstBlock, dstStride); | 2672 deInterlaceBlendCubic(dstBlock, dstStride); |
2636 */ | 2673 */ |
2637 | 2674 |
2638 #ifdef MORE_TIMEING | 2675 #ifdef MORE_TIMING |
2639 T1= rdtsc(); | 2676 T1= rdtsc(); |
2640 memcpyTime+= T1-T0; | 2677 memcpyTime+= T1-T0; |
2641 T0=T1; | 2678 T0=T1; |
2642 #endif | 2679 #endif |
2643 if(mode & V_DEBLOCK) | 2680 if(mode & V_DEBLOCK) |
2655 } | 2692 } |
2656 else | 2693 else |
2657 doVertDefFilter(vertBlock, stride, QP); | 2694 doVertDefFilter(vertBlock, stride, QP); |
2658 } | 2695 } |
2659 } | 2696 } |
2660 #ifdef MORE_TIMEING | 2697 #ifdef MORE_TIMING |
2661 T1= rdtsc(); | 2698 T1= rdtsc(); |
2662 vertTime+= T1-T0; | 2699 vertTime+= T1-T0; |
2663 T0=T1; | 2700 T0=T1; |
2664 #endif | 2701 #endif |
2665 } | 2702 } |
2681 */ | 2718 */ |
2682 } | 2719 } |
2683 | 2720 |
2684 if(x - 8 >= 0 && x<width) | 2721 if(x - 8 >= 0 && x<width) |
2685 { | 2722 { |
2686 #ifdef MORE_TIMEING | 2723 #ifdef MORE_TIMING |
2687 T0= rdtsc(); | 2724 T0= rdtsc(); |
2688 #endif | 2725 #endif |
2689 if(mode & H_DEBLOCK) | 2726 if(mode & H_DEBLOCK) |
2690 { | 2727 { |
2691 if(mode & H_X1_FILTER) | 2728 if(mode & H_X1_FILTER) |
2699 } | 2736 } |
2700 else | 2737 else |
2701 doHorizDefFilterAndCopyBack(dstBlock-4, stride, QP); | 2738 doHorizDefFilterAndCopyBack(dstBlock-4, stride, QP); |
2702 } | 2739 } |
2703 } | 2740 } |
2704 #ifdef MORE_TIMEING | 2741 #ifdef MORE_TIMING |
2705 T1= rdtsc(); | 2742 T1= rdtsc(); |
2706 horizTime+= T1-T0; | 2743 horizTime+= T1-T0; |
2707 T0=T1; | 2744 T0=T1; |
2708 #endif | 2745 #endif |
2709 dering(dstBlock - 9 - stride, stride, QP); | 2746 dering(dstBlock - 9 - stride, stride, QP); |
2723 asm volatile("femms"); | 2760 asm volatile("femms"); |
2724 #elif defined (HAVE_MMX) | 2761 #elif defined (HAVE_MMX) |
2725 asm volatile("emms"); | 2762 asm volatile("emms"); |
2726 #endif | 2763 #endif |
2727 | 2764 |
2728 #ifdef TIMEING | 2765 #ifdef TIMING |
2729 // FIXME diff is mostly the time spent for rdtsc (should subtract that but ...) | 2766 // FIXME diff is mostly the time spent for rdtsc (should subtract that but ...) |
2730 sumTime= rdtsc() - sumTime; | 2767 sumTime= rdtsc() - sumTime; |
2731 if(!isColor) | 2768 if(!isColor) |
2732 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r", | 2769 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r", |
2733 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000), | 2770 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000), |