Mercurial > mplayer.hg
comparison postproc/postprocess.c @ 2860:08b833fb875a
temporal noise reducer in C (-pp 0x100000)
setting the thresholds from the commandline requires postprocess2() / getPpModeByNameAndQuality() (someone could perhaps modify mplayer so that they are beiing used, iam too lazy for it ;)
author | michael |
---|---|
date | Tue, 13 Nov 2001 02:40:56 +0000 |
parents | a78ac799a300 |
children | dd3fabd01df0 |
comparison
equal
deleted
inserted
replaced
2859:9508f2a11335 | 2860:08b833fb875a |
---|---|
27 doHorizLowPass E e e | 27 doHorizLowPass E e e |
28 doHorizDefFilter Ec Ec Ec | 28 doHorizDefFilter Ec Ec Ec |
29 deRing E e e* | 29 deRing E e e* |
30 Vertical RKAlgo1 E a a | 30 Vertical RKAlgo1 E a a |
31 Horizontal RKAlgo1 a a | 31 Horizontal RKAlgo1 a a |
32 Vertical X1 a E E | 32 Vertical X1# a E E |
33 Horizontal X1 a E E | 33 Horizontal X1# a E E |
34 LinIpolDeinterlace e E E* | 34 LinIpolDeinterlace e E E* |
35 CubicIpolDeinterlace a e e* | 35 CubicIpolDeinterlace a e e* |
36 LinBlendDeinterlace e E E* | 36 LinBlendDeinterlace e E E* |
37 MedianDeinterlace Ec Ec | 37 MedianDeinterlace# Ec Ec |
38 | 38 TempDeNoiser# a |
39 | 39 |
40 * i dont have a 3dnow CPU -> its untested | 40 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work |
41 # more or less selfinvented filters so the exactness isnt too meaningfull | |
41 E = Exact implementation | 42 E = Exact implementation |
42 e = allmost exact implementation (slightly different rounding,...) | 43 e = allmost exact implementation (slightly different rounding,...) |
43 a = alternative / approximate impl | 44 a = alternative / approximate impl |
44 c = checked against the other implementations (-vo md5) | 45 c = checked against the other implementations (-vo md5) |
45 */ | 46 */ |
46 | 47 |
47 /* | 48 /* |
48 TODO: | 49 TODO: |
49 verify that everything workes as it should (how?) | 50 verify that everything workes as it should (how?) |
50 reduce the time wasted on the mem transfer | 51 reduce the time wasted on the mem transfer |
51 implement dering | |
52 implement everything in C at least (done at the moment but ...) | 52 implement everything in C at least (done at the moment but ...) |
53 unroll stuff if instructions depend too much on the prior one | 53 unroll stuff if instructions depend too much on the prior one |
54 we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? | 54 we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4? |
55 move YScale thing to the end instead of fixing QP | 55 move YScale thing to the end instead of fixing QP |
56 write a faster and higher quality deblocking filter :) | 56 write a faster and higher quality deblocking filter :) |
57 do something about the speed of the horizontal filters | |
58 make the mainloop more flexible (variable number of blocks at once | 57 make the mainloop more flexible (variable number of blocks at once |
59 (the if/else stuff per block is slowing things down) | 58 (the if/else stuff per block is slowing things down) |
60 compare the quality & speed of all filters | 59 compare the quality & speed of all filters |
61 split this huge file | 60 split this huge file |
62 fix warnings (unused vars, ...) | |
63 noise reduction filters | |
64 border remover | 61 border remover |
65 optimize c versions | 62 optimize c versions |
63 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | |
66 ... | 64 ... |
67 | 65 |
68 Notes: | 66 Notes: |
69 */ | 67 */ |
70 | 68 |
180 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, | 178 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, |
181 {"lb", "linblenddeint", 0, 1, 6, LINEAR_BLEND_DEINT_FILTER}, | 179 {"lb", "linblenddeint", 0, 1, 6, LINEAR_BLEND_DEINT_FILTER}, |
182 {"li", "linipoldeint", 0, 1, 6, LINEAR_IPOL_DEINT_FILTER}, | 180 {"li", "linipoldeint", 0, 1, 6, LINEAR_IPOL_DEINT_FILTER}, |
183 {"ci", "cubicipoldeint", 0, 1, 6, CUBIC_IPOL_DEINT_FILTER}, | 181 {"ci", "cubicipoldeint", 0, 1, 6, CUBIC_IPOL_DEINT_FILTER}, |
184 {"md", "mediandeint", 0, 1, 6, MEDIAN_DEINT_FILTER}, | 182 {"md", "mediandeint", 0, 1, 6, MEDIAN_DEINT_FILTER}, |
183 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, | |
185 {NULL, NULL,0,0,0,0} //End Marker | 184 {NULL, NULL,0,0,0,0} //End Marker |
186 }; | 185 }; |
187 | 186 |
188 static char *replaceTable[]= | 187 static char *replaceTable[]= |
189 { | 188 { |
190 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels", | 189 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", |
191 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels", | 190 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", |
192 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", | 191 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", |
193 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels", | 192 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", |
194 NULL //End Marker | 193 NULL //End Marker |
195 }; | 194 }; |
196 | 195 |
197 #ifdef HAVE_MMX | 196 #ifdef HAVE_MMX |
198 static inline void unusedVariableWarningFixer() | 197 static inline void unusedVariableWarningFixer() |
2592 : "%eax", "%ebx" | 2591 : "%eax", "%ebx" |
2593 ); | 2592 ); |
2594 } | 2593 } |
2595 #endif | 2594 #endif |
2596 | 2595 |
2596 static void inline tempNoiseReducer(uint8_t *src, int stride, | |
2597 uint8_t *tempBlured, int *maxNoise) | |
2598 { | |
2599 int y; | |
2600 int d=0; | |
2601 int sysd=0; | |
2602 | |
2603 for(y=0; y<8; y++) | |
2604 { | |
2605 int x; | |
2606 for(x=0; x<8; x++) | |
2607 { | |
2608 int ref= tempBlured[ x + y*stride ]; | |
2609 int cur= src[ x + y*stride ]; | |
2610 int d1=ref - cur; | |
2611 d+= ABS(d1); //d1*d1; | |
2612 sysd+= d1; | |
2613 } | |
2614 } | |
2615 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]); | |
2616 /* | |
2617 Switch between | |
2618 1 0 0 0 0 0 0 (0) | |
2619 64 32 16 8 4 2 1 (1) | |
2620 64 48 36 27 20 15 11 (33) (approx) | |
2621 64 56 49 43 37 33 29 (200) (approx) | |
2622 */ | |
2623 if(d > maxNoise[1]) | |
2624 { | |
2625 if(d < maxNoise[2]) | |
2626 { | |
2627 for(y=0; y<8; y++) | |
2628 { | |
2629 int x; | |
2630 for(x=0; x<8; x++) | |
2631 { | |
2632 int ref= tempBlured[ x + y*stride ]; | |
2633 int cur= src[ x + y*stride ]; | |
2634 tempBlured[ x + y*stride ]= | |
2635 src[ x + y*stride ]= | |
2636 (ref + cur + 1)>>1; | |
2637 } | |
2638 } | |
2639 } | |
2640 else | |
2641 { | |
2642 for(y=0; y<8; y++) | |
2643 { | |
2644 int x; | |
2645 for(x=0; x<8; x++) | |
2646 { | |
2647 tempBlured[ x + y*stride ]= src[ x + y*stride ]; | |
2648 } | |
2649 } | |
2650 } | |
2651 } | |
2652 else | |
2653 { | |
2654 if(d < maxNoise[0]) | |
2655 { | |
2656 for(y=0; y<8; y++) | |
2657 { | |
2658 int x; | |
2659 for(x=0; x<8; x++) | |
2660 { | |
2661 int ref= tempBlured[ x + y*stride ]; | |
2662 int cur= src[ x + y*stride ]; | |
2663 tempBlured[ x + y*stride ]= | |
2664 src[ x + y*stride ]= | |
2665 (ref*7 + cur + 4)>>3; | |
2666 } | |
2667 } | |
2668 } | |
2669 else | |
2670 { | |
2671 for(y=0; y<8; y++) | |
2672 { | |
2673 int x; | |
2674 for(x=0; x<8; x++) | |
2675 { | |
2676 int ref= tempBlured[ x + y*stride ]; | |
2677 int cur= src[ x + y*stride ]; | |
2678 tempBlured[ x + y*stride ]= | |
2679 src[ x + y*stride ]= | |
2680 (ref*3 + cur + 2)>>2; | |
2681 } | |
2682 } | |
2683 } | |
2684 } | |
2685 } | |
2686 | |
2597 #ifdef HAVE_ODIVX_POSTPROCESS | 2687 #ifdef HAVE_ODIVX_POSTPROCESS |
2598 #include "../opendivx/postprocess.h" | 2688 #include "../opendivx/postprocess.h" |
2599 int use_old_pp=0; | 2689 int use_old_pp=0; |
2600 #endif | 2690 #endif |
2601 | 2691 |
2602 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 2692 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
2603 QP_STORE_T QPs[], int QPStride, int isColor, int mode); | 2693 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); |
2604 | 2694 |
2605 /* -pp Command line Help | 2695 /* -pp Command line Help |
2606 NOTE/FIXME: put this at an appropriate place (--help, html docs, man mplayer)? | 2696 NOTE/FIXME: put this at an appropriate place (--help, html docs, man mplayer)? |
2607 | 2697 |
2608 -pp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]... | 2698 -pp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]... |
2609 | 2699 |
2610 long form example: | 2700 long form example: |
2611 -pp vdeblock:autoq,hdeblock:autoq,linblenddeint -pp default,-vdeblock | 2701 -pp vdeblock:autoq,hdeblock:autoq,linblenddeint -pp default,-vdeblock |
2612 short form example: | 2702 short form example: |
2613 -pp vb:a,hb:a,lb -pp de,-vb | 2703 -pp vb:a,hb:a,lb -pp de,-vb |
2704 more examples: | |
2705 -pp tn:64:128:256 | |
2614 | 2706 |
2615 Filters Options | 2707 Filters Options |
2616 short long name short long option Description | 2708 short long name short long option Description |
2617 * * a autoq cpu power dependant enabler | 2709 * * a autoq cpu power dependant enabler |
2618 c chrom chrominance filtring enabled | 2710 c chrom chrominance filtring enabled |
2629 li linipoldeint linear interpolating deinterlacer | 2721 li linipoldeint linear interpolating deinterlacer |
2630 ci cubicipoldeint cubic interpolating deinterlacer | 2722 ci cubicipoldeint cubic interpolating deinterlacer |
2631 md mediandeint median deinterlacer | 2723 md mediandeint median deinterlacer |
2632 de default hdeblock:a,vdeblock:a,dering:a,autolevels | 2724 de default hdeblock:a,vdeblock:a,dering:a,autolevels |
2633 fa fast x1hdeblock:a,x1vdeblock:a,dering:a,autolevels | 2725 fa fast x1hdeblock:a,x1vdeblock:a,dering:a,autolevels |
2726 tn tmpnoise (3 Thresholds) Temporal Noise Reducer | |
2634 */ | 2727 */ |
2635 | 2728 |
2636 /** | 2729 /** |
2637 * returns a PPMode struct which will have a non 0 error variable if an error occured | 2730 * returns a PPMode struct which will have a non 0 error variable if an error occured |
2638 * name is the string after "-pp" on the command line | 2731 * name is the string after "-pp" on the command line |
2642 { | 2735 { |
2643 char temp[GET_MODE_BUFFER_SIZE]; | 2736 char temp[GET_MODE_BUFFER_SIZE]; |
2644 char *p= temp; | 2737 char *p= temp; |
2645 char *filterDelimiters= ","; | 2738 char *filterDelimiters= ","; |
2646 char *optionDelimiters= ":"; | 2739 char *optionDelimiters= ":"; |
2647 struct PPMode ppMode= {0,0,0,0,0,0}; | 2740 struct PPMode ppMode= {0,0,0,0,0,0,{150,200,400}}; |
2648 char *filterToken; | 2741 char *filterToken; |
2649 | 2742 |
2650 strncpy(temp, name, GET_MODE_BUFFER_SIZE); | 2743 strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
2744 | |
2745 printf("%s\n", name); | |
2651 | 2746 |
2652 for(;;){ | 2747 for(;;){ |
2653 char *filterName; | 2748 char *filterName; |
2654 int q= GET_PP_QUALITY_MAX; | 2749 int q= 1000000; //GET_PP_QUALITY_MAX; |
2655 int chrom=-1; | 2750 int chrom=-1; |
2656 char *option; | 2751 char *option; |
2657 char *options[OPTIONS_ARRAY_SIZE]; | 2752 char *options[OPTIONS_ARRAY_SIZE]; |
2658 int i; | 2753 int i; |
2659 int filterNameOk=0; | 2754 int filterNameOk=0; |
2660 int numOfUnknownOptions=0; | 2755 int numOfUnknownOptions=0; |
2661 int enable=1; //does the user want us to enabled or disabled the filter | 2756 int enable=1; //does the user want us to enabled or disabled the filter |
2662 | 2757 |
2663 filterToken= strtok(p, filterDelimiters); | 2758 filterToken= strtok(p, filterDelimiters); |
2664 if(filterToken == NULL) break; | 2759 if(filterToken == NULL) break; |
2665 p+= strlen(filterToken) + 1; | 2760 p+= strlen(filterToken) + 1; // p points to next filterToken |
2666 filterName= strtok(filterToken, optionDelimiters); | 2761 filterName= strtok(filterToken, optionDelimiters); |
2667 printf("%s::%s\n", filterToken, filterName); | 2762 printf("%s::%s\n", filterToken, filterName); |
2668 | 2763 |
2669 if(*filterName == '-') | 2764 if(*filterName == '-') |
2670 { | 2765 { |
2671 enable=0; | 2766 enable=0; |
2672 filterName++; | 2767 filterName++; |
2673 } | 2768 } |
2769 | |
2674 for(;;){ //for all options | 2770 for(;;){ //for all options |
2675 option= strtok(NULL, optionDelimiters); | 2771 option= strtok(NULL, optionDelimiters); |
2676 if(option == NULL) break; | 2772 if(option == NULL) break; |
2677 | 2773 |
2678 printf("%s\n", option); | 2774 printf("%s\n", option); |
2681 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; | 2777 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; |
2682 else | 2778 else |
2683 { | 2779 { |
2684 options[numOfUnknownOptions] = option; | 2780 options[numOfUnknownOptions] = option; |
2685 numOfUnknownOptions++; | 2781 numOfUnknownOptions++; |
2686 options[numOfUnknownOptions] = NULL; | |
2687 } | 2782 } |
2688 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; | 2783 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; |
2689 } | 2784 } |
2785 options[numOfUnknownOptions] = NULL; | |
2690 | 2786 |
2691 /* replace stuff from the replace Table */ | 2787 /* replace stuff from the replace Table */ |
2692 for(i=0; replaceTable[2*i]!=NULL; i++) | 2788 for(i=0; replaceTable[2*i]!=NULL; i++) |
2693 { | 2789 { |
2694 if(!strcmp(replaceTable[2*i], filterName)) | 2790 if(!strcmp(replaceTable[2*i], filterName)) |
2713 } | 2809 } |
2714 } | 2810 } |
2715 | 2811 |
2716 for(i=0; filters[i].shortName!=NULL; i++) | 2812 for(i=0; filters[i].shortName!=NULL; i++) |
2717 { | 2813 { |
2814 // printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName); | |
2718 if( !strcmp(filters[i].longName, filterName) | 2815 if( !strcmp(filters[i].longName, filterName) |
2719 || !strcmp(filters[i].shortName, filterName)) | 2816 || !strcmp(filters[i].shortName, filterName)) |
2720 { | 2817 { |
2721 ppMode.lumMode &= ~filters[i].mask; | 2818 ppMode.lumMode &= ~filters[i].mask; |
2722 ppMode.chromMode &= ~filters[i].mask; | 2819 ppMode.chromMode &= ~filters[i].mask; |
2742 ppMode.minAllowedY= 0; | 2839 ppMode.minAllowedY= 0; |
2743 ppMode.maxAllowedY= 255; | 2840 ppMode.maxAllowedY= 255; |
2744 numOfUnknownOptions--; | 2841 numOfUnknownOptions--; |
2745 } | 2842 } |
2746 } | 2843 } |
2844 else if(filters[i].mask == TEMP_NOISE_FILTER) | |
2845 { | |
2846 int o; | |
2847 int numOfNoises=0; | |
2848 ppMode.maxTmpNoise[0]= 150; | |
2849 ppMode.maxTmpNoise[1]= 200; | |
2850 ppMode.maxTmpNoise[2]= 400; | |
2851 | |
2852 for(o=0; options[o]!=NULL; o++) | |
2853 { | |
2854 char *tail; | |
2855 ppMode.maxTmpNoise[numOfNoises]= | |
2856 strtol(options[o], &tail, 0); | |
2857 if(tail!=options[o]) | |
2858 { | |
2859 numOfNoises++; | |
2860 numOfUnknownOptions--; | |
2861 if(numOfNoises >= 3) break; | |
2862 } | |
2863 } | |
2864 } | |
2747 } | 2865 } |
2748 } | 2866 } |
2749 if(!filterNameOk) ppMode.error++; | 2867 if(!filterNameOk) ppMode.error++; |
2750 ppMode.error += numOfUnknownOptions; | 2868 ppMode.error += numOfUnknownOptions; |
2751 } | 2869 } |
2761 | 2879 |
2762 return ppMode; | 2880 return ppMode; |
2763 } | 2881 } |
2764 | 2882 |
2765 /** | 2883 /** |
2766 * ... | 2884 * Obsolete, dont use it, use postprocess2() instead |
2767 */ | 2885 */ |
2768 void postprocess(unsigned char * src[], int src_stride, | 2886 void postprocess(unsigned char * src[], int src_stride, |
2769 unsigned char * dst[], int dst_stride, | 2887 unsigned char * dst[], int dst_stride, |
2770 int horizontal_size, int vertical_size, | 2888 int horizontal_size, int vertical_size, |
2771 QP_STORE_T *QP_store, int QP_stride, | 2889 QP_STORE_T *QP_store, int QP_stride, |
2772 int mode) | 2890 int mode) |
2773 { | 2891 { |
2892 struct PPMode ppMode; | |
2893 static QP_STORE_T zeroArray[2048/8]; | |
2774 /* | 2894 /* |
2775 static int qual=0; | 2895 static int qual=0; |
2776 | 2896 |
2777 struct PPMode ppMode= getPPModeByNameAndQuality("fast,default,-hdeblock,-vdeblock", qual); | 2897 ppMode= getPPModeByNameAndQuality("fast,default,-hdeblock,-vdeblock,tmpnoise:150:200:300", qual); |
2898 printf("OK\n"); | |
2778 qual++; | 2899 qual++; |
2779 qual%=7; | 2900 qual%=7; |
2780 printf("\n%d %d %d %d\n", ppMode.lumMode, ppMode.chromMode, ppMode.oldMode, ppMode.error); | 2901 printf("\n%X %X %X %X :%d: %d %d %d\n", ppMode.lumMode, ppMode.chromMode, ppMode.oldMode, ppMode.error, |
2902 qual, ppMode.maxTmpNoise[0], ppMode.maxTmpNoise[1], ppMode.maxTmpNoise[2]); | |
2781 postprocess2(src, src_stride, dst, dst_stride, | 2903 postprocess2(src, src_stride, dst, dst_stride, |
2782 horizontal_size, vertical_size, QP_store, QP_stride, &ppMode); | 2904 horizontal_size, vertical_size, QP_store, QP_stride, &ppMode); |
2783 | 2905 |
2784 return; | 2906 return; |
2785 */ | 2907 */ |
2786 static QP_STORE_T zeroArray[2048/8]; | |
2787 if(QP_store==NULL) | 2908 if(QP_store==NULL) |
2788 { | 2909 { |
2789 QP_store= zeroArray; | 2910 QP_store= zeroArray; |
2790 QP_stride= 0; | 2911 QP_stride= 0; |
2791 } | 2912 } |
2913 | |
2914 ppMode.lumMode= mode; | |
2915 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00); | |
2916 ppMode.chromMode= mode; | |
2917 ppMode.maxTmpNoise[0]= 150; | |
2918 ppMode.maxTmpNoise[1]= 200; | |
2919 ppMode.maxTmpNoise[2]= 400; | |
2792 | 2920 |
2793 #ifdef HAVE_ODIVX_POSTPROCESS | 2921 #ifdef HAVE_ODIVX_POSTPROCESS |
2794 // Note: I could make this shit outside of this file, but it would mean one | 2922 // Note: I could make this shit outside of this file, but it would mean one |
2795 // more function call... | 2923 // more function call... |
2796 if(use_old_pp){ | 2924 if(use_old_pp){ |
2798 return; | 2926 return; |
2799 } | 2927 } |
2800 #endif | 2928 #endif |
2801 | 2929 |
2802 postProcess(src[0], src_stride, dst[0], dst_stride, | 2930 postProcess(src[0], src_stride, dst[0], dst_stride, |
2803 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode); | 2931 horizontal_size, vertical_size, QP_store, QP_stride, 0, &ppMode); |
2804 | 2932 |
2805 horizontal_size >>= 1; | 2933 horizontal_size >>= 1; |
2806 vertical_size >>= 1; | 2934 vertical_size >>= 1; |
2807 src_stride >>= 1; | 2935 src_stride >>= 1; |
2808 dst_stride >>= 1; | 2936 dst_stride >>= 1; |
2809 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00); | |
2810 // mode&= ~(LINEAR_IPOL_DEINT_FILTER | LINEAR_BLEND_DEINT_FILTER | | 2937 // mode&= ~(LINEAR_IPOL_DEINT_FILTER | LINEAR_BLEND_DEINT_FILTER | |
2811 // MEDIAN_DEINT_FILTER | CUBIC_IPOL_DEINT_FILTER); | 2938 // MEDIAN_DEINT_FILTER | CUBIC_IPOL_DEINT_FILTER); |
2812 | 2939 |
2813 if(1) | 2940 if(1) |
2814 { | 2941 { |
2815 postProcess(src[1], src_stride, dst[1], dst_stride, | 2942 postProcess(src[1], src_stride, dst[1], dst_stride, |
2816 horizontal_size, vertical_size, QP_store, QP_stride, 1, mode); | 2943 horizontal_size, vertical_size, QP_store, QP_stride, 1, &ppMode); |
2817 postProcess(src[2], src_stride, dst[2], dst_stride, | 2944 postProcess(src[2], src_stride, dst[2], dst_stride, |
2818 horizontal_size, vertical_size, QP_store, QP_stride, 2, mode); | 2945 horizontal_size, vertical_size, QP_store, QP_stride, 2, &ppMode); |
2819 } | 2946 } |
2820 else | 2947 else |
2821 { | 2948 { |
2822 memcpy(dst[1], src[1], src_stride*horizontal_size); | 2949 memset(dst[1], 128, dst_stride*vertical_size); |
2823 memcpy(dst[2], src[2], src_stride*horizontal_size); | 2950 memset(dst[2], 128, dst_stride*vertical_size); |
2951 // memcpy(dst[1], src[1], src_stride*horizontal_size); | |
2952 // memcpy(dst[2], src[2], src_stride*horizontal_size); | |
2824 } | 2953 } |
2825 } | 2954 } |
2826 | 2955 |
2827 void postprocess2(unsigned char * src[], int src_stride, | 2956 void postprocess2(unsigned char * src[], int src_stride, |
2828 unsigned char * dst[], int dst_stride, | 2957 unsigned char * dst[], int dst_stride, |
2847 return; | 2976 return; |
2848 } | 2977 } |
2849 #endif | 2978 #endif |
2850 | 2979 |
2851 postProcess(src[0], src_stride, dst[0], dst_stride, | 2980 postProcess(src[0], src_stride, dst[0], dst_stride, |
2852 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode->lumMode); | 2981 horizontal_size, vertical_size, QP_store, QP_stride, 0, mode); |
2853 | 2982 |
2854 horizontal_size >>= 1; | 2983 horizontal_size >>= 1; |
2855 vertical_size >>= 1; | 2984 vertical_size >>= 1; |
2856 src_stride >>= 1; | 2985 src_stride >>= 1; |
2857 dst_stride >>= 1; | 2986 dst_stride >>= 1; |
2858 | 2987 |
2859 postProcess(src[1], src_stride, dst[1], dst_stride, | 2988 postProcess(src[1], src_stride, dst[1], dst_stride, |
2860 horizontal_size, vertical_size, QP_store, QP_stride, 1, mode->chromMode); | 2989 horizontal_size, vertical_size, QP_store, QP_stride, 1, mode); |
2861 postProcess(src[2], src_stride, dst[2], dst_stride, | 2990 postProcess(src[2], src_stride, dst[2], dst_stride, |
2862 horizontal_size, vertical_size, QP_store, QP_stride, 2, mode->chromMode); | 2991 horizontal_size, vertical_size, QP_store, QP_stride, 2, mode); |
2863 } | 2992 } |
2864 | 2993 |
2865 | 2994 |
2866 /** | 2995 /** |
2867 * gets the mode flags for a given quality (larger values mean slower but better postprocessing) | 2996 * gets the mode flags for a given quality (larger values mean slower but better postprocessing) |
3021 | 3150 |
3022 /** | 3151 /** |
3023 * Filters array of bytes (Y or U or V values) | 3152 * Filters array of bytes (Y or U or V values) |
3024 */ | 3153 */ |
3025 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 3154 static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
3026 QP_STORE_T QPs[], int QPStride, int isColor, int mode) | 3155 QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode) |
3027 { | 3156 { |
3028 int x,y; | 3157 int x,y; |
3158 const int mode= isColor ? ppMode->chromMode : ppMode->lumMode; | |
3159 | |
3029 /* we need 64bit here otherwise weŽll going to have a problem | 3160 /* we need 64bit here otherwise weŽll going to have a problem |
3030 after watching a black picture for 5 hours*/ | 3161 after watching a black picture for 5 hours*/ |
3031 static uint64_t *yHistogram= NULL; | 3162 static uint64_t *yHistogram= NULL; |
3032 int black=0, white=255; // blackest black and whitest white in the picture | 3163 int black=0, white=255; // blackest black and whitest white in the picture |
3033 int QPCorrecture= 256; | 3164 int QPCorrecture= 256; |
3038 | 3169 |
3039 /* Temporary buffers for handling the last block */ | 3170 /* Temporary buffers for handling the last block */ |
3040 static uint8_t *tempDstBlock= NULL; | 3171 static uint8_t *tempDstBlock= NULL; |
3041 static uint8_t *tempSrcBlock= NULL; | 3172 static uint8_t *tempSrcBlock= NULL; |
3042 | 3173 |
3174 /* Temporal noise reducing buffers */ | |
3175 static uint8_t *tempBlured[3]= {NULL,NULL,NULL}; | |
3176 | |
3043 #ifdef PP_FUNNY_STRIDE | 3177 #ifdef PP_FUNNY_STRIDE |
3044 uint8_t *dstBlockPtrBackup; | 3178 uint8_t *dstBlockPtrBackup; |
3045 uint8_t *srcBlockPtrBackup; | 3179 uint8_t *srcBlockPtrBackup; |
3046 #endif | 3180 #endif |
3047 | 3181 |
3058 { | 3192 { |
3059 tempDst= (uint8_t*)memalign(8, 1024*24); | 3193 tempDst= (uint8_t*)memalign(8, 1024*24); |
3060 tempSrc= (uint8_t*)memalign(8, 1024*24); | 3194 tempSrc= (uint8_t*)memalign(8, 1024*24); |
3061 tempDstBlock= (uint8_t*)memalign(8, 1024*24); | 3195 tempDstBlock= (uint8_t*)memalign(8, 1024*24); |
3062 tempSrcBlock= (uint8_t*)memalign(8, 1024*24); | 3196 tempSrcBlock= (uint8_t*)memalign(8, 1024*24); |
3197 } | |
3198 | |
3199 if(tempBlured[isColor]==NULL && (mode & TEMP_NOISE_FILTER)) | |
3200 { | |
3201 // printf("%d %d %d\n", isColor, dstStride, height); | |
3202 //FIXME works only as long as the size doesnt increase | |
3203 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end | |
3204 tempBlured[isColor]= (uint8_t*)memalign(8, dstStride*((height+7)&(~7)) + 17*1024); | |
3205 | |
3206 memset(tempBlured[isColor], 0, dstStride*((height+7)&(~7)) + 17*1024); | |
3063 } | 3207 } |
3064 | 3208 |
3065 if(!yHistogram) | 3209 if(!yHistogram) |
3066 { | 3210 { |
3067 int i; | 3211 int i; |
3217 int QPDelta= isColor ? 1<<(32-3) : 1<<(32-4); | 3361 int QPDelta= isColor ? 1<<(32-3) : 1<<(32-4); |
3218 int QPFrac= QPDelta; | 3362 int QPFrac= QPDelta; |
3219 uint8_t *tempBlock1= tempBlocks; | 3363 uint8_t *tempBlock1= tempBlocks; |
3220 uint8_t *tempBlock2= tempBlocks + 8; | 3364 uint8_t *tempBlock2= tempBlocks + 8; |
3221 #endif | 3365 #endif |
3366 int QP=0; | |
3222 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards | 3367 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards |
3223 if not than use a temporary buffer */ | 3368 if not than use a temporary buffer */ |
3224 if(y+15 >= height) | 3369 if(y+15 >= height) |
3225 { | 3370 { |
3371 int i; | |
3226 /* copy from line 8 to 15 of src, these will be copied with | 3372 /* copy from line 8 to 15 of src, these will be copied with |
3227 blockcopy to dst later */ | 3373 blockcopy to dst later */ |
3228 memcpy(tempSrc + srcStride*8, srcBlock + srcStride*8, | 3374 memcpy(tempSrc + srcStride*8, srcBlock + srcStride*8, |
3229 srcStride*MAX(height-y-8, 0) ); | 3375 srcStride*MAX(height-y-8, 0) ); |
3230 | 3376 |
3231 /* duplicate last line to fill the void upto line 15 */ | 3377 /* duplicate last line of src to fill the void upto line 15 */ |
3232 if(y+15 >= height) | 3378 for(i=MAX(height-y, 8); i<=15; i++) |
3233 { | 3379 memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), srcStride); |
3234 int i; | 3380 |
3235 for(i=height-y; i<=15; i++) | 3381 /* copy up to 9 lines of dst (line -1 to 7)*/ |
3236 memcpy(tempSrc + srcStride*i, | |
3237 src + srcStride*(height-1), srcStride); | |
3238 } | |
3239 | |
3240 /* copy up to 9 lines of dst */ | |
3241 memcpy(tempDst, dstBlock - dstStride, dstStride*MIN(height-y+1, 9) ); | 3382 memcpy(tempDst, dstBlock - dstStride, dstStride*MIN(height-y+1, 9) ); |
3383 | |
3384 /* duplicate last line of dst to fill the void upto line 8 */ | |
3385 for(i=height-y+1; i<=8; i++) | |
3386 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), dstStride); | |
3387 | |
3242 dstBlock= tempDst + dstStride; | 3388 dstBlock= tempDst + dstStride; |
3243 srcBlock= tempSrc; | 3389 srcBlock= tempSrc; |
3244 } | 3390 } |
3245 | 3391 |
3246 // From this point on it is guranteed that we can read and write 16 lines downward | 3392 // From this point on it is guranteed that we can read and write 16 lines downward |
3249 for(x=0; x<width; x+=BLOCK_SIZE) | 3395 for(x=0; x<width; x+=BLOCK_SIZE) |
3250 { | 3396 { |
3251 const int stride= dstStride; | 3397 const int stride= dstStride; |
3252 uint8_t *tmpXchg; | 3398 uint8_t *tmpXchg; |
3253 #ifdef ARCH_X86 | 3399 #ifdef ARCH_X86 |
3254 int QP= *QPptr; | 3400 QP= *QPptr; |
3255 asm volatile( | 3401 asm volatile( |
3256 "addl %2, %1 \n\t" | 3402 "addl %2, %1 \n\t" |
3257 "sbbl %%eax, %%eax \n\t" | 3403 "sbbl %%eax, %%eax \n\t" |
3258 "shll $2, %%eax \n\t" | 3404 "shll $2, %%eax \n\t" |
3259 "subl %%eax, %0 \n\t" | 3405 "subl %%eax, %0 \n\t" |
3260 : "+r" (QPptr), "+m" (QPFrac) | 3406 : "+r" (QPptr), "+m" (QPFrac) |
3261 : "r" (QPDelta) | 3407 : "r" (QPDelta) |
3262 : "%eax" | 3408 : "%eax" |
3263 ); | 3409 ); |
3264 #else | 3410 #else |
3265 int QP= isColor ? | 3411 QP= isColor ? |
3266 QPs[(y>>3)*QPStride + (x>>3)]: | 3412 QPs[(y>>3)*QPStride + (x>>3)]: |
3267 QPs[(y>>4)*QPStride + (x>>4)]; | 3413 QPs[(y>>4)*QPStride + (x>>4)]; |
3268 #endif | 3414 #endif |
3269 if(!isColor) | 3415 if(!isColor) |
3270 { | 3416 { |
3440 if(mode & DERING) | 3586 if(mode & DERING) |
3441 { | 3587 { |
3442 //FIXME filter first line | 3588 //FIXME filter first line |
3443 if(y>0) dering(dstBlock - stride - 8, stride, QP); | 3589 if(y>0) dering(dstBlock - stride - 8, stride, QP); |
3444 } | 3590 } |
3591 | |
3592 if(mode & TEMP_NOISE_FILTER) | |
3593 { | |
3594 tempNoiseReducer(dstBlock-8, stride, | |
3595 tempBlured[isColor] + y*dstStride + x, | |
3596 ppMode->maxTmpNoise); | |
3597 } | |
3445 } | 3598 } |
3446 else if(mode & DERING) | |
3447 { | |
3448 //FIXME y+15 is required cuz of the tempBuffer thing -> bottom right block isnt filtered | |
3449 if(y > 8 && y+15 < height) dering(dstBlock - stride*9 + width - 8, stride, QP); | |
3450 } | |
3451 | |
3452 | 3599 |
3453 #ifdef PP_FUNNY_STRIDE | 3600 #ifdef PP_FUNNY_STRIDE |
3454 /* did we use a tmp-block buffer */ | 3601 /* did we use a tmp-block buffer */ |
3455 if(x+7 >= width) | 3602 if(x+7 >= width) |
3456 { | 3603 { |
3471 #ifdef HAVE_MMX | 3618 #ifdef HAVE_MMX |
3472 tmpXchg= tempBlock1; | 3619 tmpXchg= tempBlock1; |
3473 tempBlock1= tempBlock2; | 3620 tempBlock1= tempBlock2; |
3474 tempBlock2 = tmpXchg; | 3621 tempBlock2 = tmpXchg; |
3475 #endif | 3622 #endif |
3623 } | |
3624 | |
3625 if(mode & DERING) | |
3626 { | |
3627 if(y > 0) dering(dstBlock - dstStride - 8, dstStride, QP); | |
3628 } | |
3629 | |
3630 if((mode & TEMP_NOISE_FILTER)) | |
3631 { | |
3632 tempNoiseReducer(dstBlock-8, dstStride, | |
3633 tempBlured[isColor] + y*dstStride + x, | |
3634 ppMode->maxTmpNoise); | |
3476 } | 3635 } |
3477 | 3636 |
3478 /* did we use a tmp buffer for the last lines*/ | 3637 /* did we use a tmp buffer for the last lines*/ |
3479 if(y+15 >= height) | 3638 if(y+15 >= height) |
3480 { | 3639 { |