comparison postproc/postprocess_template.c @ 2899:6885bf566271

temp denoiser: bugfix? averageing noise over the surrounding blocks
author michael
date Wed, 14 Nov 2001 11:51:36 +0000
parents dd3fabd01df0
children 71384f064a3e
comparison
equal deleted inserted replaced
2898:4c2db939183e 2899:6885bf566271
151 static uint64_t __attribute__((aligned(8))) temp4=0; 151 static uint64_t __attribute__((aligned(8))) temp4=0;
152 static uint64_t __attribute__((aligned(8))) temp5=0; 152 static uint64_t __attribute__((aligned(8))) temp5=0;
153 static uint64_t __attribute__((aligned(8))) pQPb=0; 153 static uint64_t __attribute__((aligned(8))) pQPb=0;
154 static uint64_t __attribute__((aligned(8))) pQPb2=0; 154 static uint64_t __attribute__((aligned(8))) pQPb2=0;
155 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code 155 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code
156 static uint32_t __attribute__((aligned(4))) maxTmpNoise[4];
156 #else 157 #else
157 static uint64_t packedYOffset= 0x0000000000000000LL; 158 static uint64_t packedYOffset= 0x0000000000000000LL;
158 static uint64_t packedYScale= 0x0100010001000100LL; 159 static uint64_t packedYScale= 0x0100010001000100LL;
159 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code 160 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code
160 #endif 161 #endif
2594 } 2595 }
2595 #endif 2596 #endif
2596 //static int test=0; 2597 //static int test=0;
2597 2598
2598 static void inline tempNoiseReducer(uint8_t *src, int stride, 2599 static void inline tempNoiseReducer(uint8_t *src, int stride,
2599 uint8_t *tempBlured, int *maxNoise) 2600 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
2600 { 2601 {
2601 #define FAST_L2_DIFF 2602 #define FAST_L2_DIFF
2602 //#define L1_DIFF //u should change the thresholds too if u try that one 2603 //#define L1_DIFF //u should change the thresholds too if u try that one
2603 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 2604 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2604 asm volatile( 2605 asm volatile(
2692 2693
2693 "movq %%mm0, %%mm4 \n\t" 2694 "movq %%mm0, %%mm4 \n\t"
2694 "psrlq $32, %%mm0 \n\t" 2695 "psrlq $32, %%mm0 \n\t"
2695 "paddd %%mm0, %%mm4 \n\t" 2696 "paddd %%mm0, %%mm4 \n\t"
2696 "movd %%mm4, %%ecx \n\t" 2697 "movd %%mm4, %%ecx \n\t"
2698 "shll $2, %%ecx \n\t"
2699 "movl %3, %%ebx \n\t"
2700 "addl -4(%%ebx), %%ecx \n\t"
2701 "addl 4(%%ebx), %%ecx \n\t"
2702 "addl -1024(%%ebx), %%ecx \n\t"
2703 "addl $4, %%ecx \n\t"
2704 "addl 1024(%%ebx), %%ecx \n\t"
2705 "shrl $3, %%ecx \n\t"
2706 "movl %%ecx, (%%ebx) \n\t"
2707 "leal (%%eax, %2, 2), %%ebx \n\t" // 5*stride
2708
2697 // "movl %3, %%ecx \n\t" 2709 // "movl %3, %%ecx \n\t"
2698 // "movl %%ecx, test \n\t" 2710 // "movl %%ecx, test \n\t"
2699 // "jmp 4f \n\t" 2711 // "jmp 4f \n\t"
2700 "cmpl %4, %%ecx \n\t" 2712 "cmpl 4+maxTmpNoise, %%ecx \n\t"
2701 " jb 2f \n\t" 2713 " jb 2f \n\t"
2702 "cmpl %5, %%ecx \n\t" 2714 "cmpl 8+maxTmpNoise, %%ecx \n\t"
2703 " jb 1f \n\t" 2715 " jb 1f \n\t"
2704 2716
2705 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride 2717 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2706 "movq (%0), %%mm0 \n\t" // L0 2718 "movq (%0), %%mm0 \n\t" // L0
2707 "movq (%0, %2), %%mm1 \n\t" // L1 2719 "movq (%0, %2), %%mm1 \n\t" // L1
2756 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 2768 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6
2757 "movq %%mm7, (%0, %%ecx) \n\t" // L7 2769 "movq %%mm7, (%0, %%ecx) \n\t" // L7
2758 "jmp 4f \n\t" 2770 "jmp 4f \n\t"
2759 2771
2760 "2: \n\t" 2772 "2: \n\t"
2761 "cmpl %3, %%ecx \n\t" 2773 "cmpl maxTmpNoise, %%ecx \n\t"
2762 " jb 3f \n\t" 2774 " jb 3f \n\t"
2763 2775
2764 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride 2776 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
2765 "movq (%0), %%mm0 \n\t" // L0 2777 "movq (%0), %%mm0 \n\t" // L0
2766 "movq (%0, %2), %%mm1 \n\t" // L1 2778 "movq (%0, %2), %%mm1 \n\t" // L1
2873 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 2885 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6
2874 "movq %%mm3, (%0, %%ecx) \n\t" // L7 2886 "movq %%mm3, (%0, %%ecx) \n\t" // L7
2875 2887
2876 "4: \n\t" 2888 "4: \n\t"
2877 2889
2878 :: "r" (src), "r" (tempBlured), "r"(stride), 2890 :: "r" (src), "r" (tempBlured), "r"(stride), "m" (tempBluredPast)
2879 "m"(maxNoise[0]), "m"(maxNoise[1]), "m"(maxNoise[2])
2880 : "%eax", "%ebx", "%ecx", "memory" 2891 : "%eax", "%ebx", "%ecx", "memory"
2881 ); 2892 );
2882 //printf("%d\n", test); 2893 //printf("%d\n", test);
2883 #else 2894 #else
2884 int y; 2895 int y;
2885 int d=0; 2896 int d=0;
2886 int sysd=0; 2897 int sysd=0;
2898 int i;
2887 2899
2888 for(y=0; y<8; y++) 2900 for(y=0; y<8; y++)
2889 { 2901 {
2890 int x; 2902 int x;
2891 for(x=0; x<8; x++) 2903 for(x=0; x<8; x++)
2898 // d+= ABS(d1); 2910 // d+= ABS(d1);
2899 d+= d1*d1; 2911 d+= d1*d1;
2900 sysd+= d1; 2912 sysd+= d1;
2901 } 2913 }
2902 } 2914 }
2915 i=d;
2916 d= (
2917 4*d
2918 +(*(tempBluredPast-256))
2919 +(*(tempBluredPast-1))+ (*(tempBluredPast+1))
2920 +(*(tempBluredPast+256))
2921 +4)>>3;
2922 *tempBluredPast=i;
2923 // ((*tempBluredPast)*3 + d + 2)>>2;
2924
2903 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]); 2925 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]);
2904 /* 2926 /*
2905 Switch between 2927 Switch between
2906 1 0 0 0 0 0 0 (0) 2928 1 0 0 0 0 0 0 (0)
2907 64 32 16 8 4 2 1 (1) 2929 64 32 16 8 4 2 1 (1)
3460 static uint8_t *tempDstBlock= NULL; 3482 static uint8_t *tempDstBlock= NULL;
3461 static uint8_t *tempSrcBlock= NULL; 3483 static uint8_t *tempSrcBlock= NULL;
3462 3484
3463 /* Temporal noise reducing buffers */ 3485 /* Temporal noise reducing buffers */
3464 static uint8_t *tempBlured[3]= {NULL,NULL,NULL}; 3486 static uint8_t *tempBlured[3]= {NULL,NULL,NULL};
3487 static uint32_t *tempBluredPast[3]= {NULL,NULL,NULL};
3465 3488
3466 #ifdef PP_FUNNY_STRIDE 3489 #ifdef PP_FUNNY_STRIDE
3467 uint8_t *dstBlockPtrBackup; 3490 uint8_t *dstBlockPtrBackup;
3468 uint8_t *srcBlockPtrBackup; 3491 uint8_t *srcBlockPtrBackup;
3469 #endif 3492 #endif
3474 #ifdef TIMING 3497 #ifdef TIMING
3475 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; 3498 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime;
3476 sumTime= rdtsc(); 3499 sumTime= rdtsc();
3477 #endif 3500 #endif
3478 //mode= 0x7F; 3501 //mode= 0x7F;
3502 #ifdef HAVE_MMX
3503 maxTmpNoise[0]= ppMode->maxTmpNoise[0];
3504 maxTmpNoise[1]= ppMode->maxTmpNoise[1];
3505 maxTmpNoise[2]= ppMode->maxTmpNoise[2];
3506 #endif
3479 3507
3480 if(tempDst==NULL) 3508 if(tempDst==NULL)
3481 { 3509 {
3482 tempDst= (uint8_t*)memalign(8, 1024*24); 3510 tempDst= (uint8_t*)memalign(8, 1024*24);
3483 tempSrc= (uint8_t*)memalign(8, 1024*24); 3511 tempSrc= (uint8_t*)memalign(8, 1024*24);
3489 { 3517 {
3490 // printf("%d %d %d\n", isColor, dstStride, height); 3518 // printf("%d %d %d\n", isColor, dstStride, height);
3491 //FIXME works only as long as the size doesnt increase 3519 //FIXME works only as long as the size doesnt increase
3492 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end 3520 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
3493 tempBlured[isColor]= (uint8_t*)memalign(8, dstStride*((height+7)&(~7)) + 17*1024); 3521 tempBlured[isColor]= (uint8_t*)memalign(8, dstStride*((height+7)&(~7)) + 17*1024);
3522 tempBluredPast[isColor]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024);
3494 3523
3495 memset(tempBlured[isColor], 0, dstStride*((height+7)&(~7)) + 17*1024); 3524 memset(tempBlured[isColor], 0, dstStride*((height+7)&(~7)) + 17*1024);
3525 memset(tempBluredPast[isColor], 0, 256*((height+7)&(~7))/2 + 17*1024);
3496 } 3526 }
3497 3527
3498 if(!yHistogram) 3528 if(!yHistogram)
3499 { 3529 {
3500 int i; 3530 int i;
3880 3910
3881 if(mode & TEMP_NOISE_FILTER) 3911 if(mode & TEMP_NOISE_FILTER)
3882 { 3912 {
3883 tempNoiseReducer(dstBlock-8, stride, 3913 tempNoiseReducer(dstBlock-8, stride,
3884 tempBlured[isColor] + y*dstStride + x, 3914 tempBlured[isColor] + y*dstStride + x,
3915 tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
3885 ppMode->maxTmpNoise); 3916 ppMode->maxTmpNoise);
3886 } 3917 }
3887 } 3918 }
3888 3919
3889 #ifdef PP_FUNNY_STRIDE 3920 #ifdef PP_FUNNY_STRIDE
3918 3949
3919 if((mode & TEMP_NOISE_FILTER)) 3950 if((mode & TEMP_NOISE_FILTER))
3920 { 3951 {
3921 tempNoiseReducer(dstBlock-8, dstStride, 3952 tempNoiseReducer(dstBlock-8, dstStride,
3922 tempBlured[isColor] + y*dstStride + x, 3953 tempBlured[isColor] + y*dstStride + x,
3954 tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
3923 ppMode->maxTmpNoise); 3955 ppMode->maxTmpNoise);
3924 } 3956 }
3925 3957
3926 /* did we use a tmp buffer for the last lines*/ 3958 /* did we use a tmp buffer for the last lines*/
3927 if(y+15 >= height) 3959 if(y+15 >= height)