# HG changeset patch # User arpi # Date 1003172384 0 # Node ID 9da2a0515184625e1bf13fbe897f2e70cd5e798b # Parent 1c08969075255795ed6e775e65bf230322d11a95 software yv12->rgb scaler - separated from fsdga diff -r 1c0896907525 -r 9da2a0515184 postproc/swscale.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/postproc/swscale.c Mon Oct 15 18:59:44 2001 +0000 @@ -0,0 +1,124 @@ + +// Software scaling and colorspace conversion routines for MPlayer + +// temporary storage for 4 yuv lines: +static unsigned int pix_buf_y[4][2048]; +static unsigned int pix_buf_uv[2][2048*2]; + +// clipping helper table for C implementations: +static unsigned char clip_table[768]; + +// yuv->rgb conversion tables: +static int yuvtab_2568[256]; +static int yuvtab_3343[256]; +static int yuvtab_0c92[256]; +static int yuvtab_1a1e[256]; +static int yuvtab_40cf[256]; + +// *** bilinear scaling and yuv->rgb conversion of yv12 slices: +// *** Note: it's called multiple times while decoding a frame, first time y==0 +// *** Designed to upscale, but may work for downscale too. +// s_xinc = (src_width << 8) / dst_width +// s_yinc = (src_height << 16) / dst_height +void SwScale_YV12slice_brg24(unsigned char* srcptr[],int stride[], int y, int h, + unsigned char* dstptr, int dststride, int dstw, int dstbpp, + unsigned int s_xinc,unsigned int s_yinc){ + +// scaling factors: +//static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height; +//static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width; + +unsigned int s_xinc2=s_xinc>>1; + +static int s_srcypos; +static int s_ypos; +static int s_last_ypos; + + if(y==0){ + s_srcypos=-2*s_yinc; + s_ypos=-2; + s_last_ypos=-2; + } // reset counters + + while(1){ + unsigned char *dest=dstptr+dststride*s_ypos; + int y0=2+(s_srcypos>>16); + int y1=1+(s_srcypos>>17); + int yalpha=(s_srcypos&0xFFFF)>>8; + int yalpha1=yalpha^255; + int uvalpha=((s_srcypos>>1)&0xFFFF)>>8; + int uvalpha1=uvalpha^255; + unsigned int *buf0=pix_buf_y[y0&3]; + unsigned int *buf1=pix_buf_y[((y0+1)&3)]; + unsigned int *uvbuf0=pix_buf_uv[y1&1]; + unsigned int *uvbuf1=pix_buf_uv[(y1&1)^1]; + int i; + + if(y0>=y+h) break; + + s_ypos++; s_srcypos+=s_yinc; + + if(s_last_ypos!=y0){ + unsigned char *src=srcptr[0]+(y0-y)*stride[0]; + unsigned int xpos=0; + s_last_ypos=y0; + // *** horizontal scale Y line to temp buffer + // this loop should be rewritten in MMX assembly!!!! + for(i=0;i>8; + register unsigned int xalpha=xpos&0xFF; + buf1[i]=(src[xx]*(xalpha^255)+src[xx+1]*xalpha); + xpos+=s_xinc; + } + // *** horizontal scale U and V lines to temp buffer + if(!(y0&1)){ + unsigned char *src1=srcptr[1]+(y1-y/2)*stride[1]; + unsigned char *src2=srcptr[2]+(y1-y/2)*stride[2]; + xpos=0; + // this loop should be rewritten in MMX assembly!!!! + for(i=0;i>8; + register unsigned int xalpha=xpos&0xFF; + uvbuf1[i]=(src1[xx]*(xalpha^255)+src1[xx+1]*xalpha); + uvbuf1[i+2048]=(src2[xx]*(xalpha^255)+src2[xx+1]*xalpha); + xpos+=s_xinc2; + } + } + if(!y0) continue; + } + + // this loop should be rewritten in MMX assembly!!!! + // Note1: this code can be resticted to n*8 (or n*16) width lines to simplify optimization... + // Note2: instead of using lookup tabs, mmx version could do the multiply... + // Note3: maybe we should make separated 15/16, 24 and 32bpp version of this: + for(i=0;i>16)]; + int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>16); + int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>16); + dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)]; + dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]; + dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)]; + dest+=dstbpp; + } + + } + +} + + +void SwScale_Init(){ + // generating tables: + int i; + for(i=0;i<256;i++){ + clip_table[i]=0; + clip_table[i+256]=i; + clip_table[i+512]=255; + yuvtab_2568[i]=(0x2568*(i-16))+(256<<13); + yuvtab_3343[i]=0x3343*(i-128); + yuvtab_0c92[i]=-0x0c92*(i-128); + yuvtab_1a1e[i]=-0x1a1e*(i-128); + yuvtab_40cf[i]=0x40cf*(i-128); + } + +} diff -r 1c0896907525 -r 9da2a0515184 postproc/swscale_template.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/postproc/swscale_template.c Mon Oct 15 18:59:44 2001 +0000 @@ -0,0 +1,124 @@ + +// Software scaling and colorspace conversion routines for MPlayer + +// temporary storage for 4 yuv lines: +static unsigned int pix_buf_y[4][2048]; +static unsigned int pix_buf_uv[2][2048*2]; + +// clipping helper table for C implementations: +static unsigned char clip_table[768]; + +// yuv->rgb conversion tables: +static int yuvtab_2568[256]; +static int yuvtab_3343[256]; +static int yuvtab_0c92[256]; +static int yuvtab_1a1e[256]; +static int yuvtab_40cf[256]; + +// *** bilinear scaling and yuv->rgb conversion of yv12 slices: +// *** Note: it's called multiple times while decoding a frame, first time y==0 +// *** Designed to upscale, but may work for downscale too. +// s_xinc = (src_width << 8) / dst_width +// s_yinc = (src_height << 16) / dst_height +void SwScale_YV12slice_brg24(unsigned char* srcptr[],int stride[], int y, int h, + unsigned char* dstptr, int dststride, int dstw, int dstbpp, + unsigned int s_xinc,unsigned int s_yinc){ + +// scaling factors: +//static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height; +//static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width; + +unsigned int s_xinc2=s_xinc>>1; + +static int s_srcypos; +static int s_ypos; +static int s_last_ypos; + + if(y==0){ + s_srcypos=-2*s_yinc; + s_ypos=-2; + s_last_ypos=-2; + } // reset counters + + while(1){ + unsigned char *dest=dstptr+dststride*s_ypos; + int y0=2+(s_srcypos>>16); + int y1=1+(s_srcypos>>17); + int yalpha=(s_srcypos&0xFFFF)>>8; + int yalpha1=yalpha^255; + int uvalpha=((s_srcypos>>1)&0xFFFF)>>8; + int uvalpha1=uvalpha^255; + unsigned int *buf0=pix_buf_y[y0&3]; + unsigned int *buf1=pix_buf_y[((y0+1)&3)]; + unsigned int *uvbuf0=pix_buf_uv[y1&1]; + unsigned int *uvbuf1=pix_buf_uv[(y1&1)^1]; + int i; + + if(y0>=y+h) break; + + s_ypos++; s_srcypos+=s_yinc; + + if(s_last_ypos!=y0){ + unsigned char *src=srcptr[0]+(y0-y)*stride[0]; + unsigned int xpos=0; + s_last_ypos=y0; + // *** horizontal scale Y line to temp buffer + // this loop should be rewritten in MMX assembly!!!! + for(i=0;i>8; + register unsigned int xalpha=xpos&0xFF; + buf1[i]=(src[xx]*(xalpha^255)+src[xx+1]*xalpha); + xpos+=s_xinc; + } + // *** horizontal scale U and V lines to temp buffer + if(!(y0&1)){ + unsigned char *src1=srcptr[1]+(y1-y/2)*stride[1]; + unsigned char *src2=srcptr[2]+(y1-y/2)*stride[2]; + xpos=0; + // this loop should be rewritten in MMX assembly!!!! + for(i=0;i>8; + register unsigned int xalpha=xpos&0xFF; + uvbuf1[i]=(src1[xx]*(xalpha^255)+src1[xx+1]*xalpha); + uvbuf1[i+2048]=(src2[xx]*(xalpha^255)+src2[xx+1]*xalpha); + xpos+=s_xinc2; + } + } + if(!y0) continue; + } + + // this loop should be rewritten in MMX assembly!!!! + // Note1: this code can be resticted to n*8 (or n*16) width lines to simplify optimization... + // Note2: instead of using lookup tabs, mmx version could do the multiply... + // Note3: maybe we should make separated 15/16, 24 and 32bpp version of this: + for(i=0;i>16)]; + int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>16); + int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>16); + dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)]; + dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]; + dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)]; + dest+=dstbpp; + } + + } + +} + + +void SwScale_Init(){ + // generating tables: + int i; + for(i=0;i<256;i++){ + clip_table[i]=0; + clip_table[i+256]=i; + clip_table[i+512]=255; + yuvtab_2568[i]=(0x2568*(i-16))+(256<<13); + yuvtab_3343[i]=0x3343*(i-128); + yuvtab_0c92[i]=-0x0c92*(i-128); + yuvtab_1a1e[i]=-0x1a1e*(i-128); + yuvtab_40cf[i]=0x40cf*(i-128); + } + +}