comparison postproc/postprocess.c @ 2461:60f16575bece

fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions fixed some warnings fixed the cant compile on non x86 systems (i didnt apply the patch from Oliver Schoenbrunner <oliver.schoenbrunner@jku.at> because it used ARCH_X86 instead of HAVE_MMX)
author michael
date Thu, 25 Oct 2001 11:42:34 +0000
parents b74c2a08eac9
children 94a0265c408c
comparison
equal deleted inserted replaced
2460:7925f241765b 2461:60f16575bece
23 doVertLowPass E e e 23 doVertLowPass E e e
24 doVertDefFilter Ec Ec Ec 24 doVertDefFilter Ec Ec Ec
25 isHorizDC Ec Ec 25 isHorizDC Ec Ec
26 isHorizMinMaxOk a E 26 isHorizMinMaxOk a E
27 doHorizLowPass E e e 27 doHorizLowPass E e e
28 doHorizDefFilter E E E 28 doHorizDefFilter Ec Ec Ec
29 deRing 29 deRing
30 Vertical RKAlgo1 E a a 30 Vertical RKAlgo1 E a a
31 Horizontal RKAlgo1 a a
31 Vertical X1 a E E 32 Vertical X1 a E E
32 Horizontal X1 a E E 33 Horizontal X1 a E E
33 LinIpolDeinterlace e E E* 34 LinIpolDeinterlace e E E*
34 CubicIpolDeinterlace a e e* 35 CubicIpolDeinterlace a e e*
35 LinBlendDeinterlace e E E* 36 LinBlendDeinterlace e E E*
58 (the if/else stuff per block is slowing things down) 59 (the if/else stuff per block is slowing things down)
59 compare the quality & speed of all filters 60 compare the quality & speed of all filters
60 split this huge file 61 split this huge file
61 fix warnings (unused vars, ...) 62 fix warnings (unused vars, ...)
62 noise reduction filters 63 noise reduction filters
64 border remover
63 ... 65 ...
64 66
65 Notes: 67 Notes:
66 68 fixed difference with -vo md5 between doVertDefFilter() C and MMX / MMX2 versions
67 */ 69 */
68 70
69 //Changelog: use the CVS log 71 //Changelog: use the CVS log
70 72
71 #include <inttypes.h> 73 #include <inttypes.h>
161 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", 163 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels",
162 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", 164 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels",
163 NULL //End Marker 165 NULL //End Marker
164 }; 166 };
165 167
168 static inline void unusedVariableWarningFixer()
169 {
170 if(
171 packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000
172 + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110
173 + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F
174 + bFF + b20 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4
175 + temp5 + pQPb== 0) b00=0;
176 }
177
166 #ifdef TIMING 178 #ifdef TIMING
167 static inline long long rdtsc() 179 static inline long long rdtsc()
168 { 180 {
169 long long l; 181 long long l;
170 asm volatile( "rdtsc\n\t" 182 asm volatile( "rdtsc\n\t"
209 /** 221 /**
210 * Check if the middle 8x8 Block in the given 8x16 block is flat 222 * Check if the middle 8x8 Block in the given 8x16 block is flat
211 */ 223 */
212 static inline int isVertDC(uint8_t src[], int stride){ 224 static inline int isVertDC(uint8_t src[], int stride){
213 int numEq= 0; 225 int numEq= 0;
226 #ifndef HAVE_MMX
214 int y; 227 int y;
228 #endif
215 src+= stride*4; // src points to begin of the 8x8 Block 229 src+= stride*4; // src points to begin of the 8x8 Block
216 #ifdef HAVE_MMX 230 #ifdef HAVE_MMX
217 asm volatile( 231 asm volatile(
218 "leal (%1, %2), %%eax \n\t" 232 "leal (%1, %2), %%eax \n\t"
219 "leal (%%eax, %2, 4), %%ebx \n\t" 233 "leal (%%eax, %2, 4), %%ebx \n\t"
265 279
266 " \n\t" 280 " \n\t"
267 "movq %%mm0, %%mm1 \n\t" 281 "movq %%mm0, %%mm1 \n\t"
268 "psrlw $8, %%mm0 \n\t" 282 "psrlw $8, %%mm0 \n\t"
269 "paddb %%mm1, %%mm0 \n\t" 283 "paddb %%mm1, %%mm0 \n\t"
284 #ifdef HAVE_MMX2
285 "pshufw $0xF9, %%mm0, %%mm1 \n\t"
286 "paddb %%mm1, %%mm0 \n\t"
287 "pshufw $0xFE, %%mm0, %%mm1 \n\t"
288 #else
270 "movq %%mm0, %%mm1 \n\t" 289 "movq %%mm0, %%mm1 \n\t"
271 "psrlq $16, %%mm0 \n\t" 290 "psrlq $16, %%mm0 \n\t"
272 "paddb %%mm1, %%mm0 \n\t" 291 "paddb %%mm1, %%mm0 \n\t"
273 "movq %%mm0, %%mm1 \n\t" 292 "movq %%mm0, %%mm1 \n\t"
274 "psrlq $32, %%mm0 \n\t" 293 "psrlq $32, %%mm0 \n\t"
294 #endif
275 "paddb %%mm1, %%mm0 \n\t" 295 "paddb %%mm1, %%mm0 \n\t"
276 "movd %%mm0, %0 \n\t" 296 "movd %%mm0, %0 \n\t"
277 : "=r" (numEq) 297 : "=r" (numEq)
278 : "r" (src), "r" (stride) 298 : "r" (src), "r" (stride)
279 : "%eax", "%ebx" 299 : "%eax", "%ebx"
525 sums[6] = src[l6] + src[l7]; 545 sums[6] = src[l6] + src[l7];
526 sums[7] = src[l7] + src[l8]; 546 sums[7] = src[l7] + src[l8];
527 sums[8] = src[l8] + last; 547 sums[8] = src[l8] + last;
528 548
529 src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; 549 src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
530 src[l2]= ((src[l2]<<2) + (first + sums[0] + sums[3]<<1) + sums[5] + 8)>>4; 550 src[l2]= ((src[l2]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
531 src[l3]= ((src[l3]<<2) + (first + sums[1] + sums[4]<<1) + sums[6] + 8)>>4; 551 src[l3]= ((src[l3]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
532 src[l4]= ((src[l4]<<2) + (sums[2] + sums[5]<<1) + sums[0] + sums[7] + 8)>>4; 552 src[l4]= ((src[l4]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
533 src[l5]= ((src[l5]<<2) + (sums[3] + sums[6]<<1) + sums[1] + sums[8] + 8)>>4; 553 src[l5]= ((src[l5]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
534 src[l6]= ((src[l6]<<2) + (last + sums[7] + sums[4]<<1) + sums[2] + 8)>>4; 554 src[l6]= ((src[l6]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
535 src[l7]= ((last + src[l7]<<2) + (src[l8] + sums[5]<<1) + sums[3] + 8)>>4; 555 src[l7]= (((last + src[l7])<<2) + ((src[l8] + sums[5])<<1) + sums[3] + 8)>>4;
536 src[l8]= ((sums[8]<<2) + (last + sums[6]<<1) + sums[4] + 8)>>4; 556 src[l8]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
537 557
538 src++; 558 src++;
539 } 559 }
540 560
541 #endif 561 #endif
621 const int l2= stride + l1; 641 const int l2= stride + l1;
622 const int l3= stride + l2; 642 const int l3= stride + l2;
623 const int l4= stride + l3; 643 const int l4= stride + l3;
624 const int l5= stride + l4; 644 const int l5= stride + l4;
625 const int l6= stride + l5; 645 const int l6= stride + l5;
626 const int l7= stride + l6; 646 // const int l7= stride + l6;
627 const int l8= stride + l7; 647 // const int l8= stride + l7;
628 const int l9= stride + l8; 648 // const int l9= stride + l8;
629 int x; 649 int x;
630 src+= stride*3; 650 src+= stride*3;
631 for(x=0; x<BLOCK_SIZE; x++) 651 for(x=0; x<BLOCK_SIZE; x++)
632 { 652 {
633 if(ABS(src[l4]-src[l5]) < QP + QP/4) 653 if(ABS(src[l4]-src[l5]) < QP + QP/4)
747 const int l3= stride + l2; 767 const int l3= stride + l2;
748 const int l4= stride + l3; 768 const int l4= stride + l3;
749 const int l5= stride + l4; 769 const int l5= stride + l4;
750 const int l6= stride + l5; 770 const int l6= stride + l5;
751 const int l7= stride + l6; 771 const int l7= stride + l6;
752 const int l8= stride + l7; 772 // const int l8= stride + l7;
753 const int l9= stride + l8; 773 // const int l9= stride + l8;
754 int x; 774 int x;
755 775
756 src+= stride*3; 776 src+= stride*3;
757 for(x=0; x<BLOCK_SIZE; x++) 777 for(x=0; x<BLOCK_SIZE; x++)
758 { 778 {
1201 "movq temp3, %%mm1 \n\t" // H3 - H4 1221 "movq temp3, %%mm1 \n\t" // H3 - H4
1202 1222
1203 "pxor %%mm2, %%mm2 \n\t" 1223 "pxor %%mm2, %%mm2 \n\t"
1204 "pxor %%mm3, %%mm3 \n\t" 1224 "pxor %%mm3, %%mm3 \n\t"
1205 1225
1206 // FIXME rounding error
1207 "psraw $1, %%mm0 \n\t" // (L3 - L4)/2
1208 "psraw $1, %%mm1 \n\t" // (H3 - H4)/2
1209 "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) 1226 "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4)
1210 "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4) 1227 "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4)
1211 "pxor %%mm2, %%mm0 \n\t" 1228 "pxor %%mm2, %%mm0 \n\t"
1212 "pxor %%mm3, %%mm1 \n\t" 1229 "pxor %%mm3, %%mm1 \n\t"
1213 "psubw %%mm2, %%mm0 \n\t" // |L3-L4| 1230 "psubw %%mm2, %%mm0 \n\t" // |L3-L4|
1214 "psubw %%mm3, %%mm1 \n\t" // |H3-H4| 1231 "psubw %%mm3, %%mm1 \n\t" // |H3-H4|
1215 // "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2 1232 "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2
1216 // "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2 1233 "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2
1217 1234
1218 "pxor %%mm6, %%mm2 \n\t" 1235 "pxor %%mm6, %%mm2 \n\t"
1219 "pxor %%mm7, %%mm3 \n\t" 1236 "pxor %%mm7, %%mm3 \n\t"
1220 "pand %%mm2, %%mm4 \n\t" 1237 "pand %%mm2, %%mm4 \n\t"
1221 "pand %%mm3, %%mm5 \n\t" 1238 "pand %%mm3, %%mm5 \n\t"
1772 sums[6] = dst[5] + dst[6]; 1789 sums[6] = dst[5] + dst[6];
1773 sums[7] = dst[6] + dst[7]; 1790 sums[7] = dst[6] + dst[7];
1774 sums[8] = dst[7] + last; 1791 sums[8] = dst[7] + last;
1775 1792
1776 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; 1793 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
1777 dst[1]= ((dst[1]<<2) + (first + sums[0] + sums[3]<<1) + sums[5] + 8)>>4; 1794 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
1778 dst[2]= ((dst[2]<<2) + (first + sums[1] + sums[4]<<1) + sums[6] + 8)>>4; 1795 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
1779 dst[3]= ((dst[3]<<2) + (sums[2] + sums[5]<<1) + sums[0] + sums[7] + 8)>>4; 1796 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
1780 dst[4]= ((dst[4]<<2) + (sums[3] + sums[6]<<1) + sums[1] + sums[8] + 8)>>4; 1797 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
1781 dst[5]= ((dst[5]<<2) + (last + sums[7] + sums[4]<<1) + sums[2] + 8)>>4; 1798 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
1782 dst[6]= ((last + dst[6]<<2) + (dst[7] + sums[5]<<1) + sums[3] + 8)>>4; 1799 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
1783 dst[7]= ((sums[8]<<2) + (last + sums[6]<<1) + sums[4] + 8)>>4; 1800 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
1784 1801
1785 dst+= stride; 1802 dst+= stride;
1786 } 1803 }
1787 #endif 1804 #endif
1788 } 1805 }
1816 FIND_MIN_MAX(%%ebx, %1, 2) 1833 FIND_MIN_MAX(%%ebx, %1, 2)
1817 FIND_MIN_MAX(%0, %1, 8) 1834 FIND_MIN_MAX(%0, %1, 8)
1818 FIND_MIN_MAX(%%ebx, %1, 2) 1835 FIND_MIN_MAX(%%ebx, %1, 2)
1819 1836
1820 "movq %%mm6, %%mm4 \n\t" 1837 "movq %%mm6, %%mm4 \n\t"
1821 "psrlq $32, %%mm6 \n\t" 1838 "psrlq $8, %%mm6 \n\t"
1822 "pminub %%mm4, %%mm6 \n\t" 1839 "pminub %%mm4, %%mm6 \n\t" // min of pixels
1840 #ifdef HAVE_MMX2
1841 "pshufw $0xF9, %%mm6, %%mm4 \n\t"
1842 "pminub %%mm4, %%mm6 \n\t" // min of pixels
1843 "pshufw $0xFE, %%mm6, %%mm4 \n\t"
1844 #else
1823 "movq %%mm6, %%mm4 \n\t" 1845 "movq %%mm6, %%mm4 \n\t"
1824 "psrlq $16, %%mm6 \n\t" 1846 "psrlq $16, %%mm6 \n\t"
1825 "pminub %%mm4, %%mm6 \n\t" 1847 "pminub %%mm4, %%mm6 \n\t"
1826 "movq %%mm6, %%mm4 \n\t" 1848 "movq %%mm6, %%mm4 \n\t"
1827 "psrlq $8, %%mm6 \n\t" 1849 "psrlq $32, %%mm6 \n\t"
1828 "pminub %%mm4, %%mm6 \n\t" // min of pixels 1850 #endif
1851 "pminub %%mm4, %%mm6 \n\t"
1852
1829 1853
1830 "movq %%mm7, %%mm4 \n\t" 1854 "movq %%mm7, %%mm4 \n\t"
1831 "psrlq $32, %%mm7 \n\t" 1855 "psrlq $8, %%mm7 \n\t"
1832 "pmaxub %%mm4, %%mm7 \n\t" 1856 "pmaxub %%mm4, %%mm7 \n\t" // max of pixels
1857 #ifdef HAVE_MMX2
1858 "pshufw $0xF9, %%mm7, %%mm4 \n\t"
1859 "pmaxub %%mm4, %%mm7 \n\t" // min of pixels
1860 "pshufw $0xFE, %%mm7, %%mm4 \n\t"
1861 #else
1833 "movq %%mm7, %%mm4 \n\t" 1862 "movq %%mm7, %%mm4 \n\t"
1834 "psrlq $16, %%mm7 \n\t" 1863 "psrlq $16, %%mm7 \n\t"
1835 "pmaxub %%mm4, %%mm7 \n\t" 1864 "pmaxub %%mm4, %%mm7 \n\t"
1836 "movq %%mm7, %%mm4 \n\t" 1865 "movq %%mm7, %%mm4 \n\t"
1837 "psrlq $8, %%mm7 \n\t" 1866 "psrlq $32, %%mm7 \n\t"
1838 "pmaxub %%mm4, %%mm7 \n\t" // max of pixels 1867 #endif
1868 "pmaxub %%mm4, %%mm7 \n\t"
1839 PAVGB(%%mm6, %%mm7) // (max + min)/2 1869 PAVGB(%%mm6, %%mm7) // (max + min)/2
1870 "punpcklbw %%mm7, %%mm7 \n\t"
1871 "punpcklbw %%mm7, %%mm7 \n\t"
1872 "punpcklbw %%mm7, %%mm7 \n\t"
1873
1874 "movq (%0), %%mm0 \n\t"
1875 "movq %%mm0, %%mm1 \n\t"
1876
1877
1840 1878
1841 1879
1842 : : "r" (src), "r" (stride), "r" (QP) 1880 : : "r" (src), "r" (stride), "r" (QP)
1843 : "%eax", "%ebx" 1881 : "%eax", "%ebx"
1844 ); 1882 );
2134 src++; 2172 src++;
2135 } 2173 }
2136 #endif 2174 #endif
2137 } 2175 }
2138 2176
2177 #ifdef HAVE_MMX
2139 /** 2178 /**
2140 * transposes and shift the given 8x8 Block into dst1 and dst2 2179 * transposes and shift the given 8x8 Block into dst1 and dst2
2141 */ 2180 */
2142 static inline void transpose1(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) 2181 static inline void transpose1(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
2143 { 2182 {
2297 2336
2298 :: "r" (dst), "r" (dstStride), "r" (src) 2337 :: "r" (dst), "r" (dstStride), "r" (src)
2299 : "%eax", "%ebx" 2338 : "%eax", "%ebx"
2300 ); 2339 );
2301 } 2340 }
2302 2341 #endif
2303 2342
2304 #ifdef HAVE_ODIVX_POSTPROCESS 2343 #ifdef HAVE_ODIVX_POSTPROCESS
2305 #include "../opendivx/postprocess.h" 2344 #include "../opendivx/postprocess.h"
2306 int use_old_pp=0; 2345 int use_old_pp=0;
2307 #endif 2346 #endif
2355 char *filterToken; 2394 char *filterToken;
2356 2395
2357 strncpy(temp, name, GET_MODE_BUFFER_SIZE); 2396 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
2358 2397
2359 for(;;){ 2398 for(;;){
2360 char *p2;
2361 char *filterName; 2399 char *filterName;
2362 int q= GET_PP_QUALITY_MAX; 2400 int q= GET_PP_QUALITY_MAX;
2363 int chrom=-1; 2401 int chrom=-1;
2364 char *option; 2402 char *option;
2365 char *options[OPTIONS_ARRAY_SIZE]; 2403 char *options[OPTIONS_ARRAY_SIZE];
2601 * levelFix == 0 -> dont touch the brighness & contrast 2639 * levelFix == 0 -> dont touch the brighness & contrast
2602 */ 2640 */
2603 static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int srcStride, 2641 static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int srcStride,
2604 int numLines, int levelFix) 2642 int numLines, int levelFix)
2605 { 2643 {
2644 #ifndef HAVE_MMX
2606 int i; 2645 int i;
2646 #endif
2607 if(levelFix) 2647 if(levelFix)
2608 { 2648 {
2609 #ifdef HAVE_MMX 2649 #ifdef HAVE_MMX
2610 asm volatile( 2650 asm volatile(
2611 "leal (%2,%2), %%eax \n\t" 2651 "leal (%2,%2), %%eax \n\t"
2727 2767
2728 /* Temporary buffers for handling the last block */ 2768 /* Temporary buffers for handling the last block */
2729 static uint8_t *tempDstBlock= NULL; 2769 static uint8_t *tempDstBlock= NULL;
2730 static uint8_t *tempSrcBlock= NULL; 2770 static uint8_t *tempSrcBlock= NULL;
2731 2771
2772 #ifdef PP_FUNNY_STRIDE
2732 uint8_t *dstBlockPtrBackup; 2773 uint8_t *dstBlockPtrBackup;
2733 uint8_t *srcBlockPtrBackup; 2774 uint8_t *srcBlockPtrBackup;
2734 2775 #endif
2776
2777 #ifdef MORE_TIMING
2778 long long T0, T1, diffTime=0;
2779 #endif
2735 #ifdef TIMING 2780 #ifdef TIMING
2736 long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; 2781 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime;
2737 sumTime= rdtsc(); 2782 sumTime= rdtsc();
2738 #endif 2783 #endif
2739 2784
2740 if(tempDst==NULL) 2785 if(tempDst==NULL)
2741 { 2786 {
3069 #endif 3114 #endif
3070 3115
3071 dstBlock+=8; 3116 dstBlock+=8;
3072 srcBlock+=8; 3117 srcBlock+=8;
3073 3118
3119 #ifdef HAVE_MMX
3074 tmpXchg= tempBlock1; 3120 tmpXchg= tempBlock1;
3075 tempBlock1= tempBlock2; 3121 tempBlock1= tempBlock2;
3076 tempBlock2 = tmpXchg; 3122 tempBlock2 = tmpXchg;
3123 #endif
3077 } 3124 }
3078 3125
3079 /* did we use a tmp buffer */ 3126 /* did we use a tmp buffer */
3080 if(y+15 >= height) 3127 if(y+15 >= height)
3081 { 3128 {