comparison postproc/postprocess_template.c @ 7960:0a4ab841ae29

better deblocking filter
author michael
date Tue, 29 Oct 2002 18:35:15 +0000
parents be43106d6329
children 0a5d69e6f2a2
comparison
equal deleted inserted replaced
7959:9b8dd240314d 7960:0a4ab841ae29
54 src+= stride*4; // src points to begin of the 8x8 Block 54 src+= stride*4; // src points to begin of the 8x8 Block
55 asm volatile( 55 asm volatile(
56 "leal (%1, %2), %%eax \n\t" 56 "leal (%1, %2), %%eax \n\t"
57 // 0 1 2 3 4 5 6 7 8 9 57 // 0 1 2 3 4 5 6 7 8 9
58 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 58 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
59 "movq %3, %%mm7 \n\t" // mm7 = 0x7F 59 "movq %3, %%mm7 \n\t"
60 "movq %4, %%mm6 \n\t" // mm6 = 0x7D 60 "movq %4, %%mm6 \n\t"
61
61 "movq (%1), %%mm0 \n\t" 62 "movq (%1), %%mm0 \n\t"
62 "movq (%%eax), %%mm1 \n\t" 63 "movq (%%eax), %%mm1 \n\t"
63 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece 64 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
64 "paddb %%mm7, %%mm0 \n\t" 65 "paddb %%mm7, %%mm0 \n\t"
65 "pcmpgtb %%mm6, %%mm0 \n\t" 66 "pcmpgtb %%mm6, %%mm0 \n\t"
117 "psrlq $32, %%mm0 \n\t" 118 "psrlq $32, %%mm0 \n\t"
118 "paddb %%mm1, %%mm0 \n\t" 119 "paddb %%mm1, %%mm0 \n\t"
119 #endif 120 #endif
120 "movd %%mm0, %0 \n\t" 121 "movd %%mm0, %0 \n\t"
121 : "=r" (numEq) 122 : "=r" (numEq)
122 : "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold) 123 : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
123 : "%eax" 124 : "%eax"
124 ); 125 );
125 numEq= (-numEq) &0xFF; 126 numEq= (-numEq) &0xFF;
126 return numEq > c->ppMode.flatnessThreshold; 127 return numEq > c->ppMode.flatnessThreshold;
127 } 128 }
148 : "=r" (isOk) 149 : "=r" (isOk)
149 : "r" (src), "r" (stride), "m" (c->pQPb) 150 : "r" (src), "r" (stride), "m" (c->pQPb)
150 ); 151 );
151 return isOk==0; 152 return isOk==0;
152 #else 153 #else
154 #if 1
153 int x; 155 int x;
154 const int QP= c->QP; 156 const int QP= c->QP;
155 src+= stride*3; 157 src+= stride*3;
156 for(x=0; x<BLOCK_SIZE; x++) 158 for(x=0; x<BLOCK_SIZE; x++)
157 { 159 {
158 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; 160 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
159 } 161 }
160 162
161 return 1; 163 return 1;
164 #else
165 int x;
166 const int QP= c->QP;
167 src+= stride*4;
168 for(x=0; x<BLOCK_SIZE; x++)
169 {
170 int min=255;
171 int max=0;
172 int y;
173 for(y=0; y<8; y++){
174 int v= src[x + y*stride];
175 if(v>max) max=v;
176 if(v<min) min=v;
177 }
178 if(max-min > 2*QP) return 0;
179 }
180 return 1;
181 #endif
162 #endif 182 #endif
163 } 183 }
164 184
165 /** 185 /**
166 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) 186 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
2637 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; 2657 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
2638 #endif 2658 #endif
2639 int black=0, white=255; // blackest black and whitest white in the picture 2659 int black=0, white=255; // blackest black and whitest white in the picture
2640 int QPCorrecture= 256*256; 2660 int QPCorrecture= 256*256;
2641 2661
2642 int copyAhead; 2662 int copyAhead, i;
2643 2663
2644 //FIXME remove 2664 //FIXME remove
2645 uint64_t * const yHistogram= c.yHistogram; 2665 uint64_t * const yHistogram= c.yHistogram;
2646 uint8_t * const tempSrc= c.tempSrc; 2666 uint8_t * const tempSrc= c.tempSrc;
2647 uint8_t * const tempDst= c.tempDst; 2667 uint8_t * const tempDst= c.tempDst;
2648 2668 const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
2649 c.dcOffset= c.ppMode.maxDcDiff;
2650 c.dcThreshold= c.ppMode.maxDcDiff*2 + 1;
2651 2669
2652 #ifdef HAVE_MMX 2670 #ifdef HAVE_MMX
2653 c.mmxDcOffset= 0x7F - c.dcOffset; 2671 for(i=0; i<32; i++){
2654 c.mmxDcThreshold= 0x7F - c.dcThreshold; 2672 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
2655 2673 int threshold= offset*2 + 1;
2656 c.mmxDcOffset*= 0x0101010101010101LL; 2674 c.mmxDcOffset[i]= 0x7F - offset;
2657 c.mmxDcThreshold*= 0x0101010101010101LL; 2675 c.mmxDcThreshold[i]= 0x7F - threshold;
2676 c.mmxDcOffset[i]*= 0x0101010101010101LL;
2677 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
2678 }
2658 #endif 2679 #endif
2659 2680
2660 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; 2681 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
2661 else if( (mode & LINEAR_BLEND_DEINT_FILTER) 2682 else if( (mode & LINEAR_BLEND_DEINT_FILTER)
2662 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; 2683 || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14;
2812 uint8_t *dstBlock= &(dst[y*dstStride]); 2833 uint8_t *dstBlock= &(dst[y*dstStride]);
2813 #ifdef HAVE_MMX 2834 #ifdef HAVE_MMX
2814 uint8_t *tempBlock1= c.tempBlocks; 2835 uint8_t *tempBlock1= c.tempBlocks;
2815 uint8_t *tempBlock2= c.tempBlocks + 8; 2836 uint8_t *tempBlock2= c.tempBlocks + 8;
2816 #endif 2837 #endif
2817 #ifdef ARCH_X86
2818 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; 2838 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride];
2819 int QPDelta= isColor ? (-1) : 1<<31; 2839 int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth];
2820 int QPFrac= 1<<30;
2821 #endif
2822 int QP=0; 2840 int QP=0;
2823 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards 2841 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
2824 if not than use a temporary buffer */ 2842 if not than use a temporary buffer */
2825 if(y+15 >= height) 2843 if(y+15 >= height)
2826 { 2844 {
2853 { 2871 {
2854 const int stride= dstStride; 2872 const int stride= dstStride;
2855 #ifdef HAVE_MMX 2873 #ifdef HAVE_MMX
2856 uint8_t *tmpXchg; 2874 uint8_t *tmpXchg;
2857 #endif 2875 #endif
2858 #ifdef ARCH_X86 2876 if(isColor)
2859 QP= *QPptr;
2860 asm volatile(
2861 "addl %2, %1 \n\t"
2862 "sbbl %%eax, %%eax \n\t"
2863 "shll $2, %%eax \n\t"
2864 "subl %%eax, %0 \n\t"
2865 : "+r" (QPptr), "+m" (QPFrac)
2866 : "r" (QPDelta)
2867 : "%eax"
2868 );
2869 #else
2870 QP= isColor ?
2871 QPs[(y>>3)*QPStride + (x>>3)]:
2872 QPs[(y>>4)*QPStride + (x>>4)];
2873 #endif
2874 if(!isColor)
2875 { 2877 {
2878 QP= QPptr[x>>3];
2879 c.nonBQP= nonBQPptr[x>>3];
2880 }
2881 else
2882 {
2883 QP= QPptr[x>>4];
2876 QP= (QP* QPCorrecture + 256*128)>>16; 2884 QP= (QP* QPCorrecture + 256*128)>>16;
2885 c.nonBQP= nonBQPptr[x>>4];
2886 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
2877 yHistogram[ srcBlock[srcStride*12 + 4] ]++; 2887 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
2878 } 2888 }
2879 //printf("%d ", QP);
2880 c.QP= QP; 2889 c.QP= QP;
2881 #ifdef HAVE_MMX 2890 #ifdef HAVE_MMX
2882 asm volatile( 2891 asm volatile(
2883 "movd %1, %%mm7 \n\t" 2892 "movd %1, %%mm7 \n\t"
2884 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP 2893 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP