Mercurial > mplayer.hg
comparison postproc/postprocess_template.c @ 7960:0a4ab841ae29
better deblocking filter
author | michael |
---|---|
date | Tue, 29 Oct 2002 18:35:15 +0000 |
parents | be43106d6329 |
children | 0a5d69e6f2a2 |
comparison
equal
deleted
inserted
replaced
7959:9b8dd240314d | 7960:0a4ab841ae29 |
---|---|
54 src+= stride*4; // src points to begin of the 8x8 Block | 54 src+= stride*4; // src points to begin of the 8x8 Block |
55 asm volatile( | 55 asm volatile( |
56 "leal (%1, %2), %%eax \n\t" | 56 "leal (%1, %2), %%eax \n\t" |
57 // 0 1 2 3 4 5 6 7 8 9 | 57 // 0 1 2 3 4 5 6 7 8 9 |
58 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 | 58 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 |
59 "movq %3, %%mm7 \n\t" // mm7 = 0x7F | 59 "movq %3, %%mm7 \n\t" |
60 "movq %4, %%mm6 \n\t" // mm6 = 0x7D | 60 "movq %4, %%mm6 \n\t" |
61 | |
61 "movq (%1), %%mm0 \n\t" | 62 "movq (%1), %%mm0 \n\t" |
62 "movq (%%eax), %%mm1 \n\t" | 63 "movq (%%eax), %%mm1 \n\t" |
63 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece | 64 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece |
64 "paddb %%mm7, %%mm0 \n\t" | 65 "paddb %%mm7, %%mm0 \n\t" |
65 "pcmpgtb %%mm6, %%mm0 \n\t" | 66 "pcmpgtb %%mm6, %%mm0 \n\t" |
117 "psrlq $32, %%mm0 \n\t" | 118 "psrlq $32, %%mm0 \n\t" |
118 "paddb %%mm1, %%mm0 \n\t" | 119 "paddb %%mm1, %%mm0 \n\t" |
119 #endif | 120 #endif |
120 "movd %%mm0, %0 \n\t" | 121 "movd %%mm0, %0 \n\t" |
121 : "=r" (numEq) | 122 : "=r" (numEq) |
122 : "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold) | 123 : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) |
123 : "%eax" | 124 : "%eax" |
124 ); | 125 ); |
125 numEq= (-numEq) &0xFF; | 126 numEq= (-numEq) &0xFF; |
126 return numEq > c->ppMode.flatnessThreshold; | 127 return numEq > c->ppMode.flatnessThreshold; |
127 } | 128 } |
148 : "=r" (isOk) | 149 : "=r" (isOk) |
149 : "r" (src), "r" (stride), "m" (c->pQPb) | 150 : "r" (src), "r" (stride), "m" (c->pQPb) |
150 ); | 151 ); |
151 return isOk==0; | 152 return isOk==0; |
152 #else | 153 #else |
154 #if 1 | |
153 int x; | 155 int x; |
154 const int QP= c->QP; | 156 const int QP= c->QP; |
155 src+= stride*3; | 157 src+= stride*3; |
156 for(x=0; x<BLOCK_SIZE; x++) | 158 for(x=0; x<BLOCK_SIZE; x++) |
157 { | 159 { |
158 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; | 160 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
159 } | 161 } |
160 | 162 |
161 return 1; | 163 return 1; |
164 #else | |
165 int x; | |
166 const int QP= c->QP; | |
167 src+= stride*4; | |
168 for(x=0; x<BLOCK_SIZE; x++) | |
169 { | |
170 int min=255; | |
171 int max=0; | |
172 int y; | |
173 for(y=0; y<8; y++){ | |
174 int v= src[x + y*stride]; | |
175 if(v>max) max=v; | |
176 if(v<min) min=v; | |
177 } | |
178 if(max-min > 2*QP) return 0; | |
179 } | |
180 return 1; | |
181 #endif | |
162 #endif | 182 #endif |
163 } | 183 } |
164 | 184 |
165 /** | 185 /** |
166 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) | 186 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) |
2637 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; | 2657 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; |
2638 #endif | 2658 #endif |
2639 int black=0, white=255; // blackest black and whitest white in the picture | 2659 int black=0, white=255; // blackest black and whitest white in the picture |
2640 int QPCorrecture= 256*256; | 2660 int QPCorrecture= 256*256; |
2641 | 2661 |
2642 int copyAhead; | 2662 int copyAhead, i; |
2643 | 2663 |
2644 //FIXME remove | 2664 //FIXME remove |
2645 uint64_t * const yHistogram= c.yHistogram; | 2665 uint64_t * const yHistogram= c.yHistogram; |
2646 uint8_t * const tempSrc= c.tempSrc; | 2666 uint8_t * const tempSrc= c.tempSrc; |
2647 uint8_t * const tempDst= c.tempDst; | 2667 uint8_t * const tempDst= c.tempDst; |
2648 | 2668 const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; |
2649 c.dcOffset= c.ppMode.maxDcDiff; | |
2650 c.dcThreshold= c.ppMode.maxDcDiff*2 + 1; | |
2651 | 2669 |
2652 #ifdef HAVE_MMX | 2670 #ifdef HAVE_MMX |
2653 c.mmxDcOffset= 0x7F - c.dcOffset; | 2671 for(i=0; i<32; i++){ |
2654 c.mmxDcThreshold= 0x7F - c.dcThreshold; | 2672 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; |
2655 | 2673 int threshold= offset*2 + 1; |
2656 c.mmxDcOffset*= 0x0101010101010101LL; | 2674 c.mmxDcOffset[i]= 0x7F - offset; |
2657 c.mmxDcThreshold*= 0x0101010101010101LL; | 2675 c.mmxDcThreshold[i]= 0x7F - threshold; |
2676 c.mmxDcOffset[i]*= 0x0101010101010101LL; | |
2677 c.mmxDcThreshold[i]*= 0x0101010101010101LL; | |
2678 } | |
2658 #endif | 2679 #endif |
2659 | 2680 |
2660 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; | 2681 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; |
2661 else if( (mode & LINEAR_BLEND_DEINT_FILTER) | 2682 else if( (mode & LINEAR_BLEND_DEINT_FILTER) |
2662 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; | 2683 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; |
2812 uint8_t *dstBlock= &(dst[y*dstStride]); | 2833 uint8_t *dstBlock= &(dst[y*dstStride]); |
2813 #ifdef HAVE_MMX | 2834 #ifdef HAVE_MMX |
2814 uint8_t *tempBlock1= c.tempBlocks; | 2835 uint8_t *tempBlock1= c.tempBlocks; |
2815 uint8_t *tempBlock2= c.tempBlocks + 8; | 2836 uint8_t *tempBlock2= c.tempBlocks + 8; |
2816 #endif | 2837 #endif |
2817 #ifdef ARCH_X86 | |
2818 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; | 2838 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; |
2819 int QPDelta= isColor ? (-1) : 1<<31; | 2839 int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth]; |
2820 int QPFrac= 1<<30; | |
2821 #endif | |
2822 int QP=0; | 2840 int QP=0; |
2823 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards | 2841 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards |
2824 if not than use a temporary buffer */ | 2842 if not than use a temporary buffer */ |
2825 if(y+15 >= height) | 2843 if(y+15 >= height) |
2826 { | 2844 { |
2853 { | 2871 { |
2854 const int stride= dstStride; | 2872 const int stride= dstStride; |
2855 #ifdef HAVE_MMX | 2873 #ifdef HAVE_MMX |
2856 uint8_t *tmpXchg; | 2874 uint8_t *tmpXchg; |
2857 #endif | 2875 #endif |
2858 #ifdef ARCH_X86 | 2876 if(isColor) |
2859 QP= *QPptr; | |
2860 asm volatile( | |
2861 "addl %2, %1 \n\t" | |
2862 "sbbl %%eax, %%eax \n\t" | |
2863 "shll $2, %%eax \n\t" | |
2864 "subl %%eax, %0 \n\t" | |
2865 : "+r" (QPptr), "+m" (QPFrac) | |
2866 : "r" (QPDelta) | |
2867 : "%eax" | |
2868 ); | |
2869 #else | |
2870 QP= isColor ? | |
2871 QPs[(y>>3)*QPStride + (x>>3)]: | |
2872 QPs[(y>>4)*QPStride + (x>>4)]; | |
2873 #endif | |
2874 if(!isColor) | |
2875 { | 2877 { |
2878 QP= QPptr[x>>3]; | |
2879 c.nonBQP= nonBQPptr[x>>3]; | |
2880 } | |
2881 else | |
2882 { | |
2883 QP= QPptr[x>>4]; | |
2876 QP= (QP* QPCorrecture + 256*128)>>16; | 2884 QP= (QP* QPCorrecture + 256*128)>>16; |
2885 c.nonBQP= nonBQPptr[x>>4]; | |
2886 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; | |
2877 yHistogram[ srcBlock[srcStride*12 + 4] ]++; | 2887 yHistogram[ srcBlock[srcStride*12 + 4] ]++; |
2878 } | 2888 } |
2879 //printf("%d ", QP); | |
2880 c.QP= QP; | 2889 c.QP= QP; |
2881 #ifdef HAVE_MMX | 2890 #ifdef HAVE_MMX |
2882 asm volatile( | 2891 asm volatile( |
2883 "movd %1, %%mm7 \n\t" | 2892 "movd %1, %%mm7 \n\t" |
2884 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP | 2893 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP |