Mercurial > mplayer.hg
comparison postproc/postprocess_template.c @ 2899:6885bf566271
temp denoiser:
bugfix?
averageing noise over the surrounding blocks
author | michael |
---|---|
date | Wed, 14 Nov 2001 11:51:36 +0000 |
parents | dd3fabd01df0 |
children | 71384f064a3e |
comparison
equal
deleted
inserted
replaced
2898:4c2db939183e | 2899:6885bf566271 |
---|---|
151 static uint64_t __attribute__((aligned(8))) temp4=0; | 151 static uint64_t __attribute__((aligned(8))) temp4=0; |
152 static uint64_t __attribute__((aligned(8))) temp5=0; | 152 static uint64_t __attribute__((aligned(8))) temp5=0; |
153 static uint64_t __attribute__((aligned(8))) pQPb=0; | 153 static uint64_t __attribute__((aligned(8))) pQPb=0; |
154 static uint64_t __attribute__((aligned(8))) pQPb2=0; | 154 static uint64_t __attribute__((aligned(8))) pQPb2=0; |
155 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code | 155 static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code |
156 static uint32_t __attribute__((aligned(4))) maxTmpNoise[4]; | |
156 #else | 157 #else |
157 static uint64_t packedYOffset= 0x0000000000000000LL; | 158 static uint64_t packedYOffset= 0x0000000000000000LL; |
158 static uint64_t packedYScale= 0x0100010001000100LL; | 159 static uint64_t packedYScale= 0x0100010001000100LL; |
159 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | 160 static uint8_t tempBlocks[8*16*2]; //used for the horizontal code |
160 #endif | 161 #endif |
2594 } | 2595 } |
2595 #endif | 2596 #endif |
2596 //static int test=0; | 2597 //static int test=0; |
2597 | 2598 |
2598 static void inline tempNoiseReducer(uint8_t *src, int stride, | 2599 static void inline tempNoiseReducer(uint8_t *src, int stride, |
2599 uint8_t *tempBlured, int *maxNoise) | 2600 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) |
2600 { | 2601 { |
2601 #define FAST_L2_DIFF | 2602 #define FAST_L2_DIFF |
2602 //#define L1_DIFF //u should change the thresholds too if u try that one | 2603 //#define L1_DIFF //u should change the thresholds too if u try that one |
2603 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2604 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
2604 asm volatile( | 2605 asm volatile( |
2692 | 2693 |
2693 "movq %%mm0, %%mm4 \n\t" | 2694 "movq %%mm0, %%mm4 \n\t" |
2694 "psrlq $32, %%mm0 \n\t" | 2695 "psrlq $32, %%mm0 \n\t" |
2695 "paddd %%mm0, %%mm4 \n\t" | 2696 "paddd %%mm0, %%mm4 \n\t" |
2696 "movd %%mm4, %%ecx \n\t" | 2697 "movd %%mm4, %%ecx \n\t" |
2698 "shll $2, %%ecx \n\t" | |
2699 "movl %3, %%ebx \n\t" | |
2700 "addl -4(%%ebx), %%ecx \n\t" | |
2701 "addl 4(%%ebx), %%ecx \n\t" | |
2702 "addl -1024(%%ebx), %%ecx \n\t" | |
2703 "addl $4, %%ecx \n\t" | |
2704 "addl 1024(%%ebx), %%ecx \n\t" | |
2705 "shrl $3, %%ecx \n\t" | |
2706 "movl %%ecx, (%%ebx) \n\t" | |
2707 "leal (%%eax, %2, 2), %%ebx \n\t" // 5*stride | |
2708 | |
2697 // "movl %3, %%ecx \n\t" | 2709 // "movl %3, %%ecx \n\t" |
2698 // "movl %%ecx, test \n\t" | 2710 // "movl %%ecx, test \n\t" |
2699 // "jmp 4f \n\t" | 2711 // "jmp 4f \n\t" |
2700 "cmpl %4, %%ecx \n\t" | 2712 "cmpl 4+maxTmpNoise, %%ecx \n\t" |
2701 " jb 2f \n\t" | 2713 " jb 2f \n\t" |
2702 "cmpl %5, %%ecx \n\t" | 2714 "cmpl 8+maxTmpNoise, %%ecx \n\t" |
2703 " jb 1f \n\t" | 2715 " jb 1f \n\t" |
2704 | 2716 |
2705 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride | 2717 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
2706 "movq (%0), %%mm0 \n\t" // L0 | 2718 "movq (%0), %%mm0 \n\t" // L0 |
2707 "movq (%0, %2), %%mm1 \n\t" // L1 | 2719 "movq (%0, %2), %%mm1 \n\t" // L1 |
2756 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 | 2768 "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 |
2757 "movq %%mm7, (%0, %%ecx) \n\t" // L7 | 2769 "movq %%mm7, (%0, %%ecx) \n\t" // L7 |
2758 "jmp 4f \n\t" | 2770 "jmp 4f \n\t" |
2759 | 2771 |
2760 "2: \n\t" | 2772 "2: \n\t" |
2761 "cmpl %3, %%ecx \n\t" | 2773 "cmpl maxTmpNoise, %%ecx \n\t" |
2762 " jb 3f \n\t" | 2774 " jb 3f \n\t" |
2763 | 2775 |
2764 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride | 2776 "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride |
2765 "movq (%0), %%mm0 \n\t" // L0 | 2777 "movq (%0), %%mm0 \n\t" // L0 |
2766 "movq (%0, %2), %%mm1 \n\t" // L1 | 2778 "movq (%0, %2), %%mm1 \n\t" // L1 |
2873 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 | 2885 "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 |
2874 "movq %%mm3, (%0, %%ecx) \n\t" // L7 | 2886 "movq %%mm3, (%0, %%ecx) \n\t" // L7 |
2875 | 2887 |
2876 "4: \n\t" | 2888 "4: \n\t" |
2877 | 2889 |
2878 :: "r" (src), "r" (tempBlured), "r"(stride), | 2890 :: "r" (src), "r" (tempBlured), "r"(stride), "m" (tempBluredPast) |
2879 "m"(maxNoise[0]), "m"(maxNoise[1]), "m"(maxNoise[2]) | |
2880 : "%eax", "%ebx", "%ecx", "memory" | 2891 : "%eax", "%ebx", "%ecx", "memory" |
2881 ); | 2892 ); |
2882 //printf("%d\n", test); | 2893 //printf("%d\n", test); |
2883 #else | 2894 #else |
2884 int y; | 2895 int y; |
2885 int d=0; | 2896 int d=0; |
2886 int sysd=0; | 2897 int sysd=0; |
2898 int i; | |
2887 | 2899 |
2888 for(y=0; y<8; y++) | 2900 for(y=0; y<8; y++) |
2889 { | 2901 { |
2890 int x; | 2902 int x; |
2891 for(x=0; x<8; x++) | 2903 for(x=0; x<8; x++) |
2898 // d+= ABS(d1); | 2910 // d+= ABS(d1); |
2899 d+= d1*d1; | 2911 d+= d1*d1; |
2900 sysd+= d1; | 2912 sysd+= d1; |
2901 } | 2913 } |
2902 } | 2914 } |
2915 i=d; | |
2916 d= ( | |
2917 4*d | |
2918 +(*(tempBluredPast-256)) | |
2919 +(*(tempBluredPast-1))+ (*(tempBluredPast+1)) | |
2920 +(*(tempBluredPast+256)) | |
2921 +4)>>3; | |
2922 *tempBluredPast=i; | |
2923 // ((*tempBluredPast)*3 + d + 2)>>2; | |
2924 | |
2903 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]); | 2925 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]); |
2904 /* | 2926 /* |
2905 Switch between | 2927 Switch between |
2906 1 0 0 0 0 0 0 (0) | 2928 1 0 0 0 0 0 0 (0) |
2907 64 32 16 8 4 2 1 (1) | 2929 64 32 16 8 4 2 1 (1) |
3460 static uint8_t *tempDstBlock= NULL; | 3482 static uint8_t *tempDstBlock= NULL; |
3461 static uint8_t *tempSrcBlock= NULL; | 3483 static uint8_t *tempSrcBlock= NULL; |
3462 | 3484 |
3463 /* Temporal noise reducing buffers */ | 3485 /* Temporal noise reducing buffers */ |
3464 static uint8_t *tempBlured[3]= {NULL,NULL,NULL}; | 3486 static uint8_t *tempBlured[3]= {NULL,NULL,NULL}; |
3487 static uint32_t *tempBluredPast[3]= {NULL,NULL,NULL}; | |
3465 | 3488 |
3466 #ifdef PP_FUNNY_STRIDE | 3489 #ifdef PP_FUNNY_STRIDE |
3467 uint8_t *dstBlockPtrBackup; | 3490 uint8_t *dstBlockPtrBackup; |
3468 uint8_t *srcBlockPtrBackup; | 3491 uint8_t *srcBlockPtrBackup; |
3469 #endif | 3492 #endif |
3474 #ifdef TIMING | 3497 #ifdef TIMING |
3475 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; | 3498 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; |
3476 sumTime= rdtsc(); | 3499 sumTime= rdtsc(); |
3477 #endif | 3500 #endif |
3478 //mode= 0x7F; | 3501 //mode= 0x7F; |
3502 #ifdef HAVE_MMX | |
3503 maxTmpNoise[0]= ppMode->maxTmpNoise[0]; | |
3504 maxTmpNoise[1]= ppMode->maxTmpNoise[1]; | |
3505 maxTmpNoise[2]= ppMode->maxTmpNoise[2]; | |
3506 #endif | |
3479 | 3507 |
3480 if(tempDst==NULL) | 3508 if(tempDst==NULL) |
3481 { | 3509 { |
3482 tempDst= (uint8_t*)memalign(8, 1024*24); | 3510 tempDst= (uint8_t*)memalign(8, 1024*24); |
3483 tempSrc= (uint8_t*)memalign(8, 1024*24); | 3511 tempSrc= (uint8_t*)memalign(8, 1024*24); |
3489 { | 3517 { |
3490 // printf("%d %d %d\n", isColor, dstStride, height); | 3518 // printf("%d %d %d\n", isColor, dstStride, height); |
3491 //FIXME works only as long as the size doesnt increase | 3519 //FIXME works only as long as the size doesnt increase |
3492 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end | 3520 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end |
3493 tempBlured[isColor]= (uint8_t*)memalign(8, dstStride*((height+7)&(~7)) + 17*1024); | 3521 tempBlured[isColor]= (uint8_t*)memalign(8, dstStride*((height+7)&(~7)) + 17*1024); |
3522 tempBluredPast[isColor]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024); | |
3494 | 3523 |
3495 memset(tempBlured[isColor], 0, dstStride*((height+7)&(~7)) + 17*1024); | 3524 memset(tempBlured[isColor], 0, dstStride*((height+7)&(~7)) + 17*1024); |
3525 memset(tempBluredPast[isColor], 0, 256*((height+7)&(~7))/2 + 17*1024); | |
3496 } | 3526 } |
3497 | 3527 |
3498 if(!yHistogram) | 3528 if(!yHistogram) |
3499 { | 3529 { |
3500 int i; | 3530 int i; |
3880 | 3910 |
3881 if(mode & TEMP_NOISE_FILTER) | 3911 if(mode & TEMP_NOISE_FILTER) |
3882 { | 3912 { |
3883 tempNoiseReducer(dstBlock-8, stride, | 3913 tempNoiseReducer(dstBlock-8, stride, |
3884 tempBlured[isColor] + y*dstStride + x, | 3914 tempBlured[isColor] + y*dstStride + x, |
3915 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | |
3885 ppMode->maxTmpNoise); | 3916 ppMode->maxTmpNoise); |
3886 } | 3917 } |
3887 } | 3918 } |
3888 | 3919 |
3889 #ifdef PP_FUNNY_STRIDE | 3920 #ifdef PP_FUNNY_STRIDE |
3918 | 3949 |
3919 if((mode & TEMP_NOISE_FILTER)) | 3950 if((mode & TEMP_NOISE_FILTER)) |
3920 { | 3951 { |
3921 tempNoiseReducer(dstBlock-8, dstStride, | 3952 tempNoiseReducer(dstBlock-8, dstStride, |
3922 tempBlured[isColor] + y*dstStride + x, | 3953 tempBlured[isColor] + y*dstStride + x, |
3954 tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | |
3923 ppMode->maxTmpNoise); | 3955 ppMode->maxTmpNoise); |
3924 } | 3956 } |
3925 | 3957 |
3926 /* did we use a tmp buffer for the last lines*/ | 3958 /* did we use a tmp buffer for the last lines*/ |
3927 if(y+15 >= height) | 3959 if(y+15 >= height) |