# HG changeset patch # User michael # Date 1035916515 0 # Node ID 0a4ab841ae29b25f19eae63cf673a25f7685128c # Parent 9b8dd240314d5705456ef0006c96fb16833e8f93 better deblocking filter diff -r 9b8dd240314d -r 0a4ab841ae29 libmpcodecs/vf_pp.c --- a/libmpcodecs/vf_pp.c Tue Oct 29 16:50:34 2002 +0000 +++ b/libmpcodecs/vf_pp.c Tue Oct 29 18:35:15 2002 +0000 @@ -93,7 +93,8 @@ vf->priv->dmpi->planes,vf->priv->dmpi->stride, (mpi->w+7)&(~7),mpi->h, mpi->qscale, mpi->qstride, - &vf->priv->ppMode[ vf->priv->pp ], vf->priv->context); + &vf->priv->ppMode[ vf->priv->pp ], vf->priv->context, + mpi->pict_type); } return vf_next_put_image(vf,vf->priv->dmpi); @@ -161,7 +162,7 @@ ppMode.maxTmpNoise[2]= 3000; ppMode.maxAllowedY= 234; ppMode.minAllowedY= 16; - ppMode.maxDcDiff= 1; + ppMode.baseDcDiff= 256/4; ppMode.flatnessThreshold=40; vf->priv->ppMode[i]= ppMode; diff -r 9b8dd240314d -r 0a4ab841ae29 postproc/postprocess.c --- a/postproc/postprocess.c Tue Oct 29 16:50:34 2002 +0000 +++ b/postproc/postprocess.c Tue Oct 29 18:35:15 2002 +0000 @@ -59,7 +59,6 @@ split this huge file optimize c versions try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks -put fastmemcpy back ... */ @@ -149,13 +148,14 @@ uint64_t __attribute__((aligned(8))) pQPb; uint64_t __attribute__((aligned(8))) pQPb2; + + uint64_t __attribute__((aligned(8))) mmxDcOffset[32]; + uint64_t __attribute__((aligned(8))) mmxDcThreshold[32]; - uint64_t __attribute__((aligned(8))) mmxDcOffset; - uint64_t __attribute__((aligned(8))) mmxDcThreshold; - + QP_STORE_T *nonBQPTable; + int QP; - int dcOffset; - int dcThreshold; + int nonBQP; int frameNum; @@ -247,8 +247,8 @@ { int numEq= 0; int y; - const int dcOffset= c->dcOffset; - const int dcThreshold= c->dcThreshold; + const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; for(y=0; ydcOffset; - const int dcThreshold= c->dcThreshold; + const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; src+= stride*4; // src points to begin of the 8x8 Block for(y=0; y>4; + int mbHeight= (height+15)>>4; c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); @@ -789,6 +791,8 @@ c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); c->deintTemp= (uint8_t*)memalign(8, width+16); + c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T)); + memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T)); c->frameNum=-1; @@ -809,6 +813,7 @@ free(c->tempDstBlock); free(c->tempSrcBlock); free(c->deintTemp); + free(c->nonBQPTable); free(c); } @@ -841,12 +846,14 @@ void postprocess(uint8_t * src[3], int srcStride[3], uint8_t * dst[3], int dstStride[3], - int horizontalSize, int verticalSize, + int width, int height, QP_STORE_T *QP_store, int QPStride, - PPMode *mode, void *c) + PPMode *mode, void *vc, int pict_type) { - + int mbWidth = (width+15)>>4; + int mbHeight= (height+15)>>4; QP_STORE_T quantArray[2048/8]; + PPContext *c = (PPContext*)vc; if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) { @@ -858,6 +865,29 @@ else for(i=0; i<2048/8; i++) quantArray[i]= 1; } +if(0){ +int x,y; +for(y=0; ynonBQPTable[y*mbWidth + x]= qscale; + } + } + } if(firstTime2 && verbose) { @@ -866,30 +896,30 @@ } postProcess(src[0], srcStride[0], dst[0], dstStride[0], - horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c); + width, height, QP_store, QPStride, 0, mode, c); - horizontalSize = (horizontalSize+1)>> 1; - verticalSize = (verticalSize+1)>>1; + width = (width +1)>>1; + height = (height+1)>>1; if(mode->chromMode) { postProcess(src[1], srcStride[1], dst[1], dstStride[1], - horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c); + width, height, QP_store, QPStride, 1, mode, c); postProcess(src[2], srcStride[2], dst[2], dstStride[2], - horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c); + width, height, QP_store, QPStride, 2, mode, c); } else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) { - memcpy(dst[1], src[1], srcStride[1]*verticalSize); - memcpy(dst[2], src[2], srcStride[2]*verticalSize); + memcpy(dst[1], src[1], srcStride[1]*height); + memcpy(dst[2], src[2], srcStride[2]*height); } else { int y; - for(y=0; ymmxDcOffset), "m" (c->mmxDcThreshold) + : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) : "%eax" ); numEq= (-numEq) &0xFF; @@ -150,6 +151,7 @@ ); return isOk==0; #else +#if 1 int x; const int QP= c->QP; src+= stride*3; @@ -159,6 +161,24 @@ } return 1; +#else + int x; + const int QP= c->QP; + src+= stride*4; + for(x=0; xmax) max=v; + if(v 2*QP) return 0; + } + return 1; +#endif #endif } @@ -2639,22 +2659,23 @@ int black=0, white=255; // blackest black and whitest white in the picture int QPCorrecture= 256*256; - int copyAhead; + int copyAhead, i; //FIXME remove uint64_t * const yHistogram= c.yHistogram; uint8_t * const tempSrc= c.tempSrc; uint8_t * const tempDst= c.tempDst; - - c.dcOffset= c.ppMode.maxDcDiff; - c.dcThreshold= c.ppMode.maxDcDiff*2 + 1; + const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; #ifdef HAVE_MMX - c.mmxDcOffset= 0x7F - c.dcOffset; - c.mmxDcThreshold= 0x7F - c.dcThreshold; - - c.mmxDcOffset*= 0x0101010101010101LL; - c.mmxDcThreshold*= 0x0101010101010101LL; + for(i=0; i<32; i++){ + int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; + int threshold= offset*2 + 1; + c.mmxDcOffset[i]= 0x7F - offset; + c.mmxDcThreshold[i]= 0x7F - threshold; + c.mmxDcOffset[i]*= 0x0101010101010101LL; + c.mmxDcThreshold[i]*= 0x0101010101010101LL; + } #endif if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; @@ -2814,11 +2835,8 @@ uint8_t *tempBlock1= c.tempBlocks; uint8_t *tempBlock2= c.tempBlocks + 8; #endif -#ifdef ARCH_X86 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; - int QPDelta= isColor ? (-1) : 1<<31; - int QPFrac= 1<<30; -#endif + int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth]; int QP=0; /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards if not than use a temporary buffer */ @@ -2855,28 +2873,19 @@ #ifdef HAVE_MMX uint8_t *tmpXchg; #endif -#ifdef ARCH_X86 - QP= *QPptr; - asm volatile( - "addl %2, %1 \n\t" - "sbbl %%eax, %%eax \n\t" - "shll $2, %%eax \n\t" - "subl %%eax, %0 \n\t" - : "+r" (QPptr), "+m" (QPFrac) - : "r" (QPDelta) - : "%eax" - ); -#else - QP= isColor ? - QPs[(y>>3)*QPStride + (x>>3)]: - QPs[(y>>4)*QPStride + (x>>4)]; -#endif - if(!isColor) + if(isColor) { + QP= QPptr[x>>3]; + c.nonBQP= nonBQPptr[x>>3]; + } + else + { + QP= QPptr[x>>4]; QP= (QP* QPCorrecture + 256*128)>>16; + c.nonBQP= nonBQPptr[x>>4]; + c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; yHistogram[ srcBlock[srcStride*12 + 4] ]++; } -//printf("%d ", QP); c.QP= QP; #ifdef HAVE_MMX asm volatile(