comparison libpostproc/postprocess_template.c @ 157:bc12fd7e6153 libavcodec

temp denoiser changes: (a-b)^2 instead of |a-b| and MMX2/3DNOW version
author michael
date Wed, 14 Nov 2001 02:46:58 +0000
parents c09459686be3
children d1a4f4ca7178
comparison
equal deleted inserted replaced
156:c09459686be3 157:bc12fd7e6153
33 Horizontal X1# a E E 33 Horizontal X1# a E E
34 LinIpolDeinterlace e E E* 34 LinIpolDeinterlace e E E*
35 CubicIpolDeinterlace a e e* 35 CubicIpolDeinterlace a e e*
36 LinBlendDeinterlace e E E* 36 LinBlendDeinterlace e E E*
37 MedianDeinterlace# Ec Ec 37 MedianDeinterlace# Ec Ec
38 TempDeNoiser# a 38 TempDeNoiser# E e e
39 39
40 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work 40 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41 # more or less selfinvented filters so the exactness isnt too meaningfull 41 # more or less selfinvented filters so the exactness isnt too meaningfull
42 E = Exact implementation 42 E = Exact implementation
43 e = allmost exact implementation (slightly different rounding,...) 43 e = allmost exact implementation (slightly different rounding,...)
59 compare the quality & speed of all filters 59 compare the quality & speed of all filters
60 split this huge file 60 split this huge file
61 border remover 61 border remover
62 optimize c versions 62 optimize c versions
63 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks 63 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
64 smart blur
64 ... 65 ...
65 66
66 Notes: 67 Notes:
67 */ 68 */
68 69
2590 :: "r" (dst), "r" (dstStride), "r" (src) 2591 :: "r" (dst), "r" (dstStride), "r" (src)
2591 : "%eax", "%ebx" 2592 : "%eax", "%ebx"
2592 ); 2593 );
2593 } 2594 }
2594 #endif 2595 #endif
2596 //static int test=0;
2595 2597
2596 static void inline tempNoiseReducer(uint8_t *src, int stride, 2598 static void inline tempNoiseReducer(uint8_t *src, int stride,
2597 uint8_t *tempBlured, int *maxNoise) 2599 uint8_t *tempBlured, int *maxNoise)
2598 { 2600 {
2601 #define FAST_L2_DIFF
2602 //#define L1_DIFF //u should change the thresholds too if u try that one
2603 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2604 asm volatile(
2605 "leal (%2, %2, 2), %%eax \n\t" // 3*stride
2606 "leal (%2, %2, 4), %%ebx \n\t" // 5*stride
2607 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2608 // 0 1 2 3 4 5 6 7 8 9
2609 // %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+ebx %x+2eax %x+ecx %x+8%2
2610 //FIXME reorder?
2611 #ifdef L1_DIFF //needs mmx2
2612 "movq (%0), %%mm0 \n\t" // L0
2613 "psadbw (%1), %%mm0 \n\t" // |L0-R0|
2614 "movq (%0, %2), %%mm1 \n\t" // L1
2615 "psadbw (%1, %2), %%mm1 \n\t" // |L1-R1|
2616 "movq (%0, %2, 2), %%mm2 \n\t" // L2
2617 "psadbw (%1, %2, 2), %%mm2 \n\t" // |L2-R2|
2618 "movq (%0, %%eax), %%mm3 \n\t" // L3
2619 "psadbw (%1, %%eax), %%mm3 \n\t" // |L3-R3|
2620
2621 "movq (%0, %2, 4), %%mm4 \n\t" // L4
2622 "paddw %%mm1, %%mm0 \n\t"
2623 "psadbw (%1, %2, 4), %%mm4 \n\t" // |L4-R4|
2624 "movq (%0, %%ebx), %%mm5 \n\t" // L5
2625 "paddw %%mm2, %%mm0 \n\t"
2626 "psadbw (%1, %%ebx), %%mm5 \n\t" // |L5-R5|
2627 "movq (%0, %%eax, 2), %%mm6 \n\t" // L6
2628 "paddw %%mm3, %%mm0 \n\t"
2629 "psadbw (%1, %%eax, 2), %%mm6 \n\t" // |L6-R6|
2630 "movq (%0, %%ecx), %%mm7 \n\t" // L7
2631 "paddw %%mm4, %%mm0 \n\t"
2632 "psadbw (%1, %%ecx), %%mm7 \n\t" // |L7-R7|
2633 "paddw %%mm5, %%mm6 \n\t"
2634 "paddw %%mm7, %%mm6 \n\t"
2635 "paddw %%mm6, %%mm0 \n\t"
2636 #elif defined (FAST_L2_DIFF)
2637 "pcmpeqb %%mm7, %%mm7 \n\t"
2638 "movq b80, %%mm6 \n\t"
2639 "pxor %%mm0, %%mm0 \n\t"
2640 #define L2_DIFF_CORE(a, b)\
2641 "movq " #a ", %%mm5 \n\t"\
2642 "movq " #b ", %%mm2 \n\t"\
2643 "pxor %%mm7, %%mm2 \n\t"\
2644 PAVGB(%%mm2, %%mm5)\
2645 "paddb %%mm6, %%mm5 \n\t"\
2646 "movq %%mm5, %%mm2 \n\t"\
2647 "psllw $8, %%mm5 \n\t"\
2648 "pmaddwd %%mm5, %%mm5 \n\t"\
2649 "pmaddwd %%mm2, %%mm2 \n\t"\
2650 "paddd %%mm2, %%mm5 \n\t"\
2651 "psrld $14, %%mm5 \n\t"\
2652 "paddd %%mm5, %%mm0 \n\t"
2653
2654 L2_DIFF_CORE((%0), (%1))
2655 L2_DIFF_CORE((%0, %2), (%1, %2))
2656 L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2))
2657 L2_DIFF_CORE((%0, %%eax), (%1, %%eax))
2658 L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4))
2659 L2_DIFF_CORE((%0, %%ebx), (%1, %%ebx))
2660 L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2))
2661 L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
2662
2663 #else
2664 "pxor %%mm7, %%mm7 \n\t"
2665 "pxor %%mm0, %%mm0 \n\t"
2666 #define L2_DIFF_CORE(a, b)\
2667 "movq " #a ", %%mm5 \n\t"\
2668 "movq " #b ", %%mm2 \n\t"\
2669 "movq %%mm5, %%mm1 \n\t"\
2670 "movq %%mm2, %%mm3 \n\t"\
2671 "punpcklbw %%mm7, %%mm5 \n\t"\
2672 "punpckhbw %%mm7, %%mm1 \n\t"\
2673 "punpcklbw %%mm7, %%mm2 \n\t"\
2674 "punpckhbw %%mm7, %%mm3 \n\t"\
2675 "psubw %%mm2, %%mm5 \n\t"\
2676 "psubw %%mm3, %%mm1 \n\t"\
2677 "pmaddwd %%mm5, %%mm5 \n\t"\
2678 "pmaddwd %%mm1, %%mm1 \n\t"\
2679 "paddd %%mm1, %%mm5 \n\t"\
2680 "paddd %%mm5, %%mm0 \n\t"
2681
2682 L2_DIFF_CORE((%0), (%1))
2683 L2_DIFF_CORE((%0, %2), (%1, %2))
2684 L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2))
2685 L2_DIFF_CORE((%0, %%eax), (%1, %%eax))
2686 L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4))
2687 L2_DIFF_CORE((%0, %%ebx), (%1, %%ebx))
2688 L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2))
2689 L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
2690
2691 #endif
2692
2693 "movq %%mm0, %%mm4 \n\t"
2694 "psrlq $32, %%mm0 \n\t"
2695 "paddd %%mm0, %%mm4 \n\t"
2696 "movd %%mm4, %%ecx \n\t"
2697 // "movl %3, %%ecx \n\t"
2698 // "movl %%ecx, test \n\t"
2699 // "jmp 4f \n\t"
2700 "cmpl %4, %%ecx \n\t"
2701 " jb 2f \n\t"
2702 "cmpl %5, %%ecx \n\t"
2703 " jb 1f \n\t"
2704
2705 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2706 "movq (%0), %%mm0 \n\t" // L0
2707 "movq (%0, %2), %%mm1 \n\t" // L1
2708 "movq (%0, %2, 2), %%mm2 \n\t" // L2
2709 "movq (%0, %%eax), %%mm3 \n\t" // L3
2710 "movq (%0, %2, 4), %%mm4 \n\t" // L4
2711 "movq (%0, %%ebx), %%mm5 \n\t" // L5
2712 "movq (%0, %%eax, 2), %%mm6 \n\t" // L6
2713 "movq (%0, %%ecx), %%mm7 \n\t" // L7
2714 "movq %%mm0, (%1) \n\t" // L0
2715 "movq %%mm1, (%1, %2) \n\t" // L1
2716 "movq %%mm2, (%1, %2, 2) \n\t" // L2
2717 "movq %%mm3, (%1, %%eax) \n\t" // L3
2718 "movq %%mm4, (%1, %2, 4) \n\t" // L4
2719 "movq %%mm5, (%1, %%ebx) \n\t" // L5
2720 "movq %%mm6, (%1, %%eax, 2) \n\t" // L6
2721 "movq %%mm7, (%1, %%ecx) \n\t" // L7
2722 "jmp 4f \n\t"
2723
2724 "1: \n\t"
2725 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2726 "movq (%0), %%mm0 \n\t" // L0
2727 "pavgb (%1), %%mm0 \n\t" // L0
2728 "movq (%0, %2), %%mm1 \n\t" // L1
2729 "pavgb (%1, %2), %%mm1 \n\t" // L1
2730 "movq (%0, %2, 2), %%mm2 \n\t" // L2
2731 "pavgb (%1, %2, 2), %%mm2 \n\t" // L2
2732 "movq (%0, %%eax), %%mm3 \n\t" // L3
2733 "pavgb (%1, %%eax), %%mm3 \n\t" // L3
2734 "movq (%0, %2, 4), %%mm4 \n\t" // L4
2735 "pavgb (%1, %2, 4), %%mm4 \n\t" // L4
2736 "movq (%0, %%ebx), %%mm5 \n\t" // L5
2737 "pavgb (%1, %%ebx), %%mm5 \n\t" // L5
2738 "movq (%0, %%eax, 2), %%mm6 \n\t" // L6
2739 "pavgb (%1, %%eax, 2), %%mm6 \n\t" // L6
2740 "movq (%0, %%ecx), %%mm7 \n\t" // L7
2741 "pavgb (%1, %%ecx), %%mm7 \n\t" // L7
2742 "movq %%mm0, (%1) \n\t" // R0
2743 "movq %%mm1, (%1, %2) \n\t" // R1
2744 "movq %%mm2, (%1, %2, 2) \n\t" // R2
2745 "movq %%mm3, (%1, %%eax) \n\t" // R3
2746 "movq %%mm4, (%1, %2, 4) \n\t" // R4
2747 "movq %%mm5, (%1, %%ebx) \n\t" // R5
2748 "movq %%mm6, (%1, %%eax, 2) \n\t" // R6
2749 "movq %%mm7, (%1, %%ecx) \n\t" // R7
2750 "movq %%mm0, (%0) \n\t" // L0
2751 "movq %%mm1, (%0, %2) \n\t" // L1
2752 "movq %%mm2, (%0, %2, 2) \n\t" // L2
2753 "movq %%mm3, (%0, %%eax) \n\t" // L3
2754 "movq %%mm4, (%0, %2, 4) \n\t" // L4
2755 "movq %%mm5, (%0, %%ebx) \n\t" // L5
2756 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6
2757 "movq %%mm7, (%0, %%ecx) \n\t" // L7
2758 "jmp 4f \n\t"
2759
2760 "2: \n\t"
2761 "cmpl %3, %%ecx \n\t"
2762 " jb 3f \n\t"
2763
2764 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2765 "movq (%0), %%mm0 \n\t" // L0
2766 "movq (%0, %2), %%mm1 \n\t" // L1
2767 "movq (%0, %2, 2), %%mm2 \n\t" // L2
2768 "movq (%0, %%eax), %%mm3 \n\t" // L3
2769 "movq (%1), %%mm4 \n\t" // R0
2770 "movq (%1, %2), %%mm5 \n\t" // R1
2771 "movq (%1, %2, 2), %%mm6 \n\t" // R2
2772 "movq (%1, %%eax), %%mm7 \n\t" // R3
2773 PAVGB(%%mm4, %%mm0)
2774 PAVGB(%%mm5, %%mm1)
2775 PAVGB(%%mm6, %%mm2)
2776 PAVGB(%%mm7, %%mm3)
2777 PAVGB(%%mm4, %%mm0)
2778 PAVGB(%%mm5, %%mm1)
2779 PAVGB(%%mm6, %%mm2)
2780 PAVGB(%%mm7, %%mm3)
2781 "movq %%mm0, (%1) \n\t" // R0
2782 "movq %%mm1, (%1, %2) \n\t" // R1
2783 "movq %%mm2, (%1, %2, 2) \n\t" // R2
2784 "movq %%mm3, (%1, %%eax) \n\t" // R3
2785 "movq %%mm0, (%0) \n\t" // L0
2786 "movq %%mm1, (%0, %2) \n\t" // L1
2787 "movq %%mm2, (%0, %2, 2) \n\t" // L2
2788 "movq %%mm3, (%0, %%eax) \n\t" // L3
2789
2790 "movq (%0, %2, 4), %%mm0 \n\t" // L4
2791 "movq (%0, %%ebx), %%mm1 \n\t" // L5
2792 "movq (%0, %%eax, 2), %%mm2 \n\t" // L6
2793 "movq (%0, %%ecx), %%mm3 \n\t" // L7
2794 "movq (%1, %2, 4), %%mm4 \n\t" // R4
2795 "movq (%1, %%ebx), %%mm5 \n\t" // R5
2796 "movq (%1, %%eax, 2), %%mm6 \n\t" // R6
2797 "movq (%1, %%ecx), %%mm7 \n\t" // R7
2798 PAVGB(%%mm4, %%mm0)
2799 PAVGB(%%mm5, %%mm1)
2800 PAVGB(%%mm6, %%mm2)
2801 PAVGB(%%mm7, %%mm3)
2802 PAVGB(%%mm4, %%mm0)
2803 PAVGB(%%mm5, %%mm1)
2804 PAVGB(%%mm6, %%mm2)
2805 PAVGB(%%mm7, %%mm3)
2806 "movq %%mm0, (%1, %2, 4) \n\t" // R4
2807 "movq %%mm1, (%1, %%ebx) \n\t" // R5
2808 "movq %%mm2, (%1, %%eax, 2) \n\t" // R6
2809 "movq %%mm3, (%1, %%ecx) \n\t" // R7
2810 "movq %%mm0, (%0, %2, 4) \n\t" // L4
2811 "movq %%mm1, (%0, %%ebx) \n\t" // L5
2812 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6
2813 "movq %%mm3, (%0, %%ecx) \n\t" // L7
2814 "jmp 4f \n\t"
2815
2816 "3: \n\t"
2817 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2818 "movq (%0), %%mm0 \n\t" // L0
2819 "movq (%0, %2), %%mm1 \n\t" // L1
2820 "movq (%0, %2, 2), %%mm2 \n\t" // L2
2821 "movq (%0, %%eax), %%mm3 \n\t" // L3
2822 "movq (%1), %%mm4 \n\t" // R0
2823 "movq (%1, %2), %%mm5 \n\t" // R1
2824 "movq (%1, %2, 2), %%mm6 \n\t" // R2
2825 "movq (%1, %%eax), %%mm7 \n\t" // R3
2826 PAVGB(%%mm4, %%mm0)
2827 PAVGB(%%mm5, %%mm1)
2828 PAVGB(%%mm6, %%mm2)
2829 PAVGB(%%mm7, %%mm3)
2830 PAVGB(%%mm4, %%mm0)
2831 PAVGB(%%mm5, %%mm1)
2832 PAVGB(%%mm6, %%mm2)
2833 PAVGB(%%mm7, %%mm3)
2834 PAVGB(%%mm4, %%mm0)
2835 PAVGB(%%mm5, %%mm1)
2836 PAVGB(%%mm6, %%mm2)
2837 PAVGB(%%mm7, %%mm3)
2838 "movq %%mm0, (%1) \n\t" // R0
2839 "movq %%mm1, (%1, %2) \n\t" // R1
2840 "movq %%mm2, (%1, %2, 2) \n\t" // R2
2841 "movq %%mm3, (%1, %%eax) \n\t" // R3
2842 "movq %%mm0, (%0) \n\t" // L0
2843 "movq %%mm1, (%0, %2) \n\t" // L1
2844 "movq %%mm2, (%0, %2, 2) \n\t" // L2
2845 "movq %%mm3, (%0, %%eax) \n\t" // L3
2846
2847 "movq (%0, %2, 4), %%mm0 \n\t" // L4
2848 "movq (%0, %%ebx), %%mm1 \n\t" // L5
2849 "movq (%0, %%eax, 2), %%mm2 \n\t" // L6
2850 "movq (%0, %%ecx), %%mm3 \n\t" // L7
2851 "movq (%1, %2, 4), %%mm4 \n\t" // R4
2852 "movq (%1, %%ebx), %%mm5 \n\t" // R5
2853 "movq (%1, %%eax, 2), %%mm6 \n\t" // R6
2854 "movq (%1, %%ecx), %%mm7 \n\t" // R7
2855 PAVGB(%%mm4, %%mm0)
2856 PAVGB(%%mm5, %%mm1)
2857 PAVGB(%%mm6, %%mm2)
2858 PAVGB(%%mm7, %%mm3)
2859 PAVGB(%%mm4, %%mm0)
2860 PAVGB(%%mm5, %%mm1)
2861 PAVGB(%%mm6, %%mm2)
2862 PAVGB(%%mm7, %%mm3)
2863 PAVGB(%%mm4, %%mm0)
2864 PAVGB(%%mm5, %%mm1)
2865 PAVGB(%%mm6, %%mm2)
2866 PAVGB(%%mm7, %%mm3)
2867 "movq %%mm0, (%1, %2, 4) \n\t" // R4
2868 "movq %%mm1, (%1, %%ebx) \n\t" // R5
2869 "movq %%mm2, (%1, %%eax, 2) \n\t" // R6
2870 "movq %%mm3, (%1, %%ecx) \n\t" // R7
2871 "movq %%mm0, (%0, %2, 4) \n\t" // L4
2872 "movq %%mm1, (%0, %%ebx) \n\t" // L5
2873 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6
2874 "movq %%mm3, (%0, %%ecx) \n\t" // L7
2875
2876 "4: \n\t"
2877
2878 :: "r" (src), "r" (tempBlured), "r"(stride),
2879 "m"(maxNoise[0]), "m"(maxNoise[1]), "m"(maxNoise[2])
2880 : "%eax", "%ebx", "%ecx", "memory"
2881 );
2882 //printf("%d\n", test);
2883 #else
2599 int y; 2884 int y;
2600 int d=0; 2885 int d=0;
2601 int sysd=0; 2886 int sysd=0;
2602 2887
2603 for(y=0; y<8; y++) 2888 for(y=0; y<8; y++)
2606 for(x=0; x<8; x++) 2891 for(x=0; x<8; x++)
2607 { 2892 {
2608 int ref= tempBlured[ x + y*stride ]; 2893 int ref= tempBlured[ x + y*stride ];
2609 int cur= src[ x + y*stride ]; 2894 int cur= src[ x + y*stride ];
2610 int d1=ref - cur; 2895 int d1=ref - cur;
2611 d+= ABS(d1); //d1*d1; 2896 // if(x==0 || x==7) d1+= d1>>1;
2897 // if(y==0 || y==7) d1+= d1>>1;
2898 // d+= ABS(d1);
2899 d+= d1*d1;
2612 sysd+= d1; 2900 sysd+= d1;
2613 } 2901 }
2614 } 2902 }
2615 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]); 2903 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]);
2616 /* 2904 /*
2680 (ref*3 + cur + 2)>>2; 2968 (ref*3 + cur + 2)>>2;
2681 } 2969 }
2682 } 2970 }
2683 } 2971 }
2684 } 2972 }
2973 #endif
2685 } 2974 }
2686 2975
2687 #ifdef HAVE_ODIVX_POSTPROCESS 2976 #ifdef HAVE_ODIVX_POSTPROCESS
2688 #include "../opendivx/postprocess.h" 2977 #include "../opendivx/postprocess.h"
2689 int use_old_pp=0; 2978 int use_old_pp=0;
2912 } 3201 }
2913 3202
2914 ppMode.lumMode= mode; 3203 ppMode.lumMode= mode;
2915 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00); 3204 mode= ((mode&0xFF)>>4) | (mode&0xFFFFFF00);
2916 ppMode.chromMode= mode; 3205 ppMode.chromMode= mode;
2917 ppMode.maxTmpNoise[0]= 150; 3206 ppMode.maxTmpNoise[0]= 700;
2918 ppMode.maxTmpNoise[1]= 200; 3207 ppMode.maxTmpNoise[1]= 1500;
2919 ppMode.maxTmpNoise[2]= 400; 3208 ppMode.maxTmpNoise[2]= 3000;
2920 3209
2921 #ifdef HAVE_ODIVX_POSTPROCESS 3210 #ifdef HAVE_ODIVX_POSTPROCESS
2922 // Note: I could make this shit outside of this file, but it would mean one 3211 // Note: I could make this shit outside of this file, but it would mean one
2923 // more function call... 3212 // more function call...
2924 if(use_old_pp){ 3213 if(use_old_pp){