# HG changeset patch # User michaelni # Date 1048944672 0 # Node ID 57fe9c4e0c6e911239554fe3f965ce58fddc59ff # Parent 48efa413ac812d9ac7347688cb27e098656a5dd8 fixing cliping of c deinterlacers 5 tap lowpass deinterlacer diff -r 48efa413ac81 -r 57fe9c4e0c6e libpostproc/postprocess.c --- a/libpostproc/postprocess.c Fri Mar 28 14:05:55 2003 +0000 +++ b/libpostproc/postprocess.c Sat Mar 29 13:31:12 2003 +0000 @@ -113,6 +113,10 @@ static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; #endif + +static uint8_t clip_table[3*256]; +static uint8_t * const clip_tab= clip_table + 256; + static int verbose= 0; static const int deringThreshold= 20; @@ -133,6 +137,7 @@ {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, + {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, {NULL, NULL,0,0,0,0} //End Marker @@ -751,15 +756,25 @@ reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size } - reallocAlign((void **)&c->deintTemp, 8, width+16); + reallocAlign((void **)&c->deintTemp, 8, 2*width+32); reallocAlign((void **)&c->nonBQPTable, 8, mbWidth*mbHeight*sizeof(QP_STORE_T)); reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); } +static void global_init(){ + int i; + memset(clip_table, 0, 256); + for(i=256; i<512; i++) + clip_table[i]= i; + memset(clip_table+512, 0, 256); +} + pp_context_t *pp_get_context(int width, int height, int cpuCaps){ PPContext *c= memalign(32, sizeof(PPContext)); int stride= (width+15)&(~15); //assumed / will realloc if needed + global_init(); + memset(c, 0, sizeof(PPContext)); c->cpuCaps= cpuCaps; if(cpuCaps&PP_FORMAT){ diff -r 48efa413ac81 -r 57fe9c4e0c6e libpostproc/postprocess_internal.h --- a/libpostproc/postprocess_internal.h Fri Mar 28 14:05:55 2003 +0000 +++ b/libpostproc/postprocess_internal.h Sat Mar 29 13:31:12 2003 +0000 @@ -51,6 +51,7 @@ #define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144 #define MEDIAN_DEINT_FILTER 0x80000 // 524288 #define FFMPEG_DEINT_FILTER 0x400000 +#define LOWPASS5_DEINT_FILTER 0x800000 #define TEMP_NOISE_FILTER 0x100000 #define FORCE_QUANT 0x200000 @@ -61,6 +62,17 @@ //filters on //#define COMPILE_TIME_MODE 0x77 +#if 1 +static inline int CLIP(int a){ + if(a&256) return ((a)>>31)^(-1); + else return a; +} +//#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a)) +#elif 0 +#define CLIP(a) clip_tab[a] +#else +#define CLIP(a) (a) +#endif /** * Postprocessng filter. */ diff -r 48efa413ac81 -r 57fe9c4e0c6e libpostproc/postprocess_template.c --- a/libpostproc/postprocess_template.c Fri Mar 28 14:05:55 2003 +0000 +++ b/libpostproc/postprocess_template.c Sat Mar 29 13:31:12 2003 +0000 @@ -1581,7 +1581,6 @@ * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too * lines 4-12 will be read into the deblocking filter and should be deinterlaced * this filter will read lines 3-15 and write 7-13 - * no cliping in C version */ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) { @@ -1631,10 +1630,10 @@ src+= stride*3; for(x=0; x<8; x++) { - src[stride*3] = (-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4; - src[stride*5] = (-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4; - src[stride*7] = (-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4; - src[stride*9] = (-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4; + src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); + src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); + src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); + src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); src++; } #endif @@ -1646,7 +1645,6 @@ * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too * lines 4-12 will be read into the deblocking filter and should be deinterlaced * this filter will read lines 4-13 and write 5-11 - * no cliping in C version */ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) { @@ -1705,13 +1703,13 @@ int t1= tmp[x]; int t2= src[stride*1]; - src[stride*1]= (-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3; + src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3); t1= src[stride*4]; - src[stride*3]= (-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3; + src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3); t2= src[stride*6]; - src[stride*5]= (-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3; + src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3); t1= src[stride*8]; - src[stride*7]= (-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3; + src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); tmp[x]= t1; src++; @@ -1720,6 +1718,106 @@ } /** + * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. + * will be called for every 8x8 block and can read & write from line 4-15 + * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too + * lines 4-12 will be read into the deblocking filter and should be deinterlaced + * this filter will read lines 4-13 and write 4-11 + */ +static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) +{ +#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) + src+= stride*4; + asm volatile( + "leal (%0, %1), %%eax \n\t" + "leal (%%eax, %1, 4), %%edx \n\t" + "pxor %%mm7, %%mm7 \n\t" + "movq (%2), %%mm0 \n\t" + "movq (%3), %%mm1 \n\t" +// 0 1 2 3 4 5 6 7 8 9 10 +// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx + +#define DEINT_L5(t1,t2,a,b,c)\ + "movq " #a ", %%mm2 \n\t"\ + "movq " #b ", %%mm3 \n\t"\ + "movq " #c ", %%mm4 \n\t"\ + PAVGB(t2, %%mm3) \ + PAVGB(t1, %%mm4) \ + "movq %%mm2, %%mm5 \n\t"\ + "movq %%mm2, " #t1 " \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm5 \n\t"\ + "movq %%mm2, %%mm6 \n\t"\ + "paddw %%mm2, %%mm2 \n\t"\ + "paddw %%mm6, %%mm2 \n\t"\ + "movq %%mm5, %%mm6 \n\t"\ + "paddw %%mm5, %%mm5 \n\t"\ + "paddw %%mm6, %%mm5 \n\t"\ + "movq %%mm3, %%mm6 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpckhbw %%mm7, %%mm6 \n\t"\ + "paddw %%mm3, %%mm3 \n\t"\ + "paddw %%mm6, %%mm6 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "paddw %%mm6, %%mm5 \n\t"\ + "movq %%mm4, %%mm6 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + "punpckhbw %%mm7, %%mm6 \n\t"\ + "psubw %%mm4, %%mm2 \n\t"\ + "psubw %%mm6, %%mm5 \n\t"\ + "psraw $2, %%mm2 \n\t"\ + "psraw $2, %%mm5 \n\t"\ + "packuswb %%mm5, %%mm2 \n\t"\ + "movq %%mm2, " #a " \n\t"\ + +DEINT_L5(%%mm0, %%mm1, (%0) , (%%eax) , (%%eax, %1) ) +DEINT_L5(%%mm1, %%mm0, (%%eax) , (%%eax, %1) , (%%eax, %1, 2)) +DEINT_L5(%%mm0, %%mm1, (%%eax, %1) , (%%eax, %1, 2), (%0, %1, 4) ) +DEINT_L5(%%mm1, %%mm0, (%%eax, %1, 2), (%0, %1, 4) , (%%edx) ) +DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%edx) , (%%edx, %1) ) +DEINT_L5(%%mm1, %%mm0, (%%edx) , (%%edx, %1) , (%%edx, %1, 2)) +DEINT_L5(%%mm0, %%mm1, (%%edx, %1) , (%%edx, %1, 2), (%0, %1, 8) ) +DEINT_L5(%%mm1, %%mm0, (%%edx, %1, 2), (%0, %1, 8) , (%%edx, %1, 4)) + + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%3) \n\t" + : : "r" (src), "r" (stride), "r"(tmp), "r"(tmp2) + : "%eax", "%edx" + ); +#else + int x; + src+= stride*4; + for(x=0; x<8; x++) + { + int t1= tmp[x]; + int t2= tmp2[x]; + int t3= src[0]; + + src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3); + t1= src[stride*1]; + src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3); + t2= src[stride*2]; + src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3); + t3= src[stride*3]; + src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3); + t1= src[stride*4]; + src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3); + t2= src[stride*5]; + src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3); + t3= src[stride*6]; + src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3); + t1= src[stride*7]; + src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3); + + tmp[x]= t3; + tmp2[x]= t1; + + src++; + } +#endif +} + +/** * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. * will be called for every 8x8 block and can read & write from line 4-15 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too @@ -2696,7 +2794,8 @@ if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; else if( (mode & LINEAR_BLEND_DEINT_FILTER) - || (mode & FFMPEG_DEINT_FILTER)) copyAhead=14; + || (mode & FFMPEG_DEINT_FILTER) + || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14; else if( (mode & V_DEBLOCK) || (mode & LINEAR_IPOL_DEINT_FILTER) || (mode & MEDIAN_DEINT_FILTER)) copyAhead=13; @@ -2832,6 +2931,8 @@ RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); else if(mode & FFMPEG_DEINT_FILTER) RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); + else if(mode & LOWPASS5_DEINT_FILTER) + RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); /* else if(mode & CUBIC_BLEND_DEINT_FILTER) RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); */ @@ -2974,6 +3075,8 @@ RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); else if(mode & FFMPEG_DEINT_FILTER) RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); + else if(mode & LOWPASS5_DEINT_FILTER) + RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); /* else if(mode & CUBIC_BLEND_DEINT_FILTER) RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); */