Mercurial > mplayer.hg
changeset 13564:992960f68af0
postproc/yuv2rgb_altivec.c compile fix
yuv2rgb_altivec_init_tables does initialize the SwsContext vectors.
missing vec_splat.
patch by (Luca Barbato <lu_zero at gentoo dot org>) and (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michael |
---|---|
date | Tue, 05 Oct 2004 19:11:00 +0000 |
parents | 9b55ea3879ef |
children | d8808729d3fd |
files | postproc/swscale.c postproc/swscale_internal.h postproc/yuv2rgb_altivec.c |
diffstat | 3 files changed, 235 insertions(+), 93 deletions(-) [+] |
line wrap: on
line diff
--- a/postproc/swscale.c Tue Oct 05 14:05:17 2004 +0000 +++ b/postproc/swscale.c Tue Oct 05 19:11:00 2004 +0000 @@ -1724,7 +1724,7 @@ //FIXME factorize #ifdef HAVE_ALTIVEC - yuv2rgb_altivec_init_tables (c, inv_table); + yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation); #endif return 0; }
--- a/postproc/swscale_internal.h Tue Oct 05 14:05:17 2004 +0000 +++ b/postproc/swscale_internal.h Tue Oct 05 19:11:00 2004 +0000 @@ -23,6 +23,12 @@ #include <altivec.h> #endif +#ifdef CONFIG_DARWIN +#define AVV(x...) (x) +#else +#define AVV(x...) {x} +#endif + #include "../mp_msg.h" #define MSG_WARN(args...) mp_msg(MSGT_SWS,MSGL_WARN, ##args )
--- a/postproc/yuv2rgb_altivec.c Tue Oct 05 14:05:17 2004 +0000 +++ b/postproc/yuv2rgb_altivec.c Tue Oct 05 19:11:00 2004 +0000 @@ -119,13 +119,13 @@ */ static const vector unsigned char - perm_rgb_0 = (vector unsigned char)(0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05, + perm_rgb_0 = (const vector unsigned char)AVV(0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05, 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a), - perm_rgb_1 = (vector unsigned char)(0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17, + perm_rgb_1 = (const vector unsigned char)AVV(0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17, 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f), - perm_rgb_2 = (vector unsigned char)(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + perm_rgb_2 = (const vector unsigned char)AVV(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05), - perm_rgb_3 = (vector unsigned char)(0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a, + perm_rgb_3 = (const vector unsigned char)AVV(0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a, 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f); #define vec_merge3(x2,x1,x0,y0,y1,y2) \ @@ -198,27 +198,27 @@ #define vec_unh(x) \ (vector signed short) \ - vec_perm(x,(typeof(x))(0),\ - (vector unsigned char)(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\ + vec_perm(x,(typeof(x))AVV(0),\ + (vector unsigned char)AVV(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\ 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07)) #define vec_unl(x) \ (vector signed short) \ - vec_perm(x,(typeof(x))(0),\ - (vector unsigned char)(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\ + vec_perm(x,(typeof(x))AVV(0),\ + (vector unsigned char)AVV(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\ 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F)) #define vec_clip(x) \ - vec_max (vec_min (x, (typeof(x))(255)), (typeof(x))(0)) + vec_max (vec_min (x, (typeof(x))AVV(235)), (typeof(x))AVV(16)) #define vec_packclp_a(x,y) \ (vector unsigned char)vec_pack (vec_clip (x), vec_clip (y)) #define vec_packclp(x,y) \ (vector unsigned char)vec_packs \ - ((vector unsigned short)vec_max (x,(vector signed short) (0)), \ - (vector unsigned short)vec_max (y,(vector signed short) (0))) + ((vector unsigned short)vec_max (x,(vector signed short) AVV(0)), \ + (vector unsigned short)vec_max (y,(vector signed short) AVV(0))) -//#define out_pixels(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))(0)),a,a,a,ptr) +//#define out_pixels(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,a,a,ptr) static inline void cvtyuvtoRGB (SwsContext *c, @@ -228,9 +228,10 @@ vector signed short vx,ux,uvx; Y = vec_mradds (Y, c->CY, c->OY); - - U = vec_sub (U,(vector signed short)(128)); - V = vec_sub (V,(vector signed short)(128)); + U = vec_sub (U,(vector signed short) + vec_splat((vector signed short)AVV(128),0)); + V = vec_sub (V,(vector signed short) + vec_splat((vector signed short)AVV(128),0)); // ux = (CBU*(u<<c->CSHIFT)+0x4000)>>15; ux = vec_sl (U, c->CSHIFT); @@ -324,8 +325,13 @@ align_perm = vec_lvsl (0, vi); \ v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm); \ \ - u = (vector signed char)vec_sub (u, (vector signed char)(128)); \ - v = (vector signed char)vec_sub (v, (vector signed char)(128)); \ + u = (vector signed char) \ + vec_sub (u,(vector signed char) \ + vec_splat((vector signed char)AVV(128),0));\ + v = (vector signed char) \ + vec_sub (v,(vector signed char) \ + vec_splat((vector signed char)AVV(128),0));\ + \ U = vec_unpackh (u); \ V = vec_unpackh (v); \ \ @@ -342,18 +348,18 @@ \ /* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */ \ ux = vec_sl (U, lCSHIFT); \ - ux = vec_mradds (ux, lCBU, (vector signed short)(0)); \ + ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); \ ux0 = vec_mergeh (ux,ux); \ ux1 = vec_mergel (ux,ux); \ \ /* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */ \ vx = vec_sl (V, lCSHIFT); \ - vx = vec_mradds (vx, lCRV, (vector signed short)(0)); \ + vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); \ vx0 = vec_mergeh (vx,vx); \ vx1 = vec_mergel (vx,vx); \ \ /* uvx = ((CGU*u) + (CGV*v))>>15 */ \ - uvx = vec_mradds (U, lCGU, (vector signed short)(0)); \ + uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); \ uvx = vec_mradds (V, lCGV, uvx); \ uvx0 = vec_mergeh (uvx,uvx); \ uvx1 = vec_mergel (uvx,uvx); \ @@ -403,15 +409,167 @@ } -#define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))(0)),c,b,a,ptr) -#define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a))(0)),ptr) -#define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a))(0)),ptr) -#define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))(0)),a,b,c,ptr) +#define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),c,b,a,ptr) +#define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a))AVV(0)),ptr) +#define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a))AVV(0)),ptr) +#define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,b,c,ptr) #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr) -#define out_bgr24(a,b,c,ptr) vec_mstrgb24(c,b,a,ptr) +#define out_bgr24(a,b,c,ptr) vec_mstbgr24(c,b,a,ptr) DEFCSP420_CVT (yuv2_abgr32, out_abgr) +#if 1 DEFCSP420_CVT (yuv2_bgra32, out_argb) +#else +static int altivec_yuv2_bgra32 (SwsContext *c, + unsigned char **in, int *instrides, + int srcSliceY, int srcSliceH, + unsigned char **oplanes, int *outstrides) +{ + int w = c->srcW; + int h = srcSliceH; + int i,j; + int instrides_scl[3]; + vector unsigned char y0,y1; + + vector signed char u,v; + + vector signed short Y0,Y1,Y2,Y3; + vector signed short U,V; + vector signed short vx,ux,uvx; + vector signed short vx0,ux0,uvx0; + vector signed short vx1,ux1,uvx1; + vector signed short R0,G0,B0; + vector signed short R1,G1,B1; + vector unsigned char R,G,B; + + vector unsigned char *uivP, *vivP; + vector unsigned char align_perm; + + vector signed short + lCY = c->CY, + lOY = c->OY, + lCRV = c->CRV, + lCBU = c->CBU, + lCGU = c->CGU, + lCGV = c->CGV; + + vector unsigned short lCSHIFT = c->CSHIFT; + + ubyte *y1i = in[0]; + ubyte *y2i = in[0]+w; + ubyte *ui = in[1]; + ubyte *vi = in[2]; + + vector unsigned char *oute + = (vector unsigned char *) + (oplanes[0]+srcSliceY*outstrides[0]); + vector unsigned char *outo + = (vector unsigned char *) + (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); + + + instrides_scl[0] = instrides[0]; + instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ + instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ + + + for (i=0;i<h/2;i++) { + vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); + vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); + + for (j=0;j<w/16;j++) { + + y0 = vec_ldl (0,y1i); + y1 = vec_ldl (0,y2i); + uivP = (vector unsigned char *)ui; + vivP = (vector unsigned char *)vi; + + align_perm = vec_lvsl (0, ui); + u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm); + + align_perm = vec_lvsl (0, vi); + v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm); + u = (vector signed char) + vec_sub (u,(vector signed char) + vec_splat((vector signed char)AVV(128),0)); + + v = (vector signed char) + vec_sub (v, (vector signed char) + vec_splat((vector signed char)AVV(128),0)); + + U = vec_unpackh (u); + V = vec_unpackh (v); + + + Y0 = vec_unh (y0); + Y1 = vec_unl (y0); + Y2 = vec_unh (y1); + Y3 = vec_unl (y1); + + Y0 = vec_mradds (Y0, lCY, lOY); + Y1 = vec_mradds (Y1, lCY, lOY); + Y2 = vec_mradds (Y2, lCY, lOY); + Y3 = vec_mradds (Y3, lCY, lOY); + + /* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */ + ux = vec_sl (U, lCSHIFT); + ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); + ux0 = vec_mergeh (ux,ux); + ux1 = vec_mergel (ux,ux); + + /* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */ + vx = vec_sl (V, lCSHIFT); + vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); + vx0 = vec_mergeh (vx,vx); + vx1 = vec_mergel (vx,vx); + /* uvx = ((CGU*u) + (CGV*v))>>15 */ + uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); + uvx = vec_mradds (V, lCGV, uvx); + uvx0 = vec_mergeh (uvx,uvx); + uvx1 = vec_mergel (uvx,uvx); + R0 = vec_add (Y0,vx0); + G0 = vec_add (Y0,uvx0); + B0 = vec_add (Y0,ux0); + R1 = vec_add (Y1,vx1); + G1 = vec_add (Y1,uvx1); + B1 = vec_add (Y1,ux1); + R = vec_packclp (R0,R1); + G = vec_packclp (G0,G1); + B = vec_packclp (B0,B1); + + out_argb(R,G,B,oute); + R0 = vec_add (Y2,vx0); + G0 = vec_add (Y2,uvx0); + B0 = vec_add (Y2,ux0); + R1 = vec_add (Y3,vx1); + G1 = vec_add (Y3,uvx1); + B1 = vec_add (Y3,ux1); + R = vec_packclp (R0,R1); + G = vec_packclp (G0,G1); + B = vec_packclp (B0,B1); + + out_argb(R,G,B,outo); + y1i += 16; + y2i += 16; + ui += 8; + vi += 8; + + } + + outo += (outstrides[0])>>4; + oute += (outstrides[0])>>4; + + ui += instrides_scl[1]; + vi += instrides_scl[2]; + y1i += instrides_scl[0]; + y2i += instrides_scl[0]; + } + return srcSliceH; +} + +#endif + + DEFCSP420_CVT (yuv2_rgba32, out_rgba) DEFCSP420_CVT (yuv2_argb32, out_argb) DEFCSP420_CVT (yuv2_rgb24, out_rgb24) @@ -422,15 +580,15 @@ // 0123 4567 89ab cdef static const vector unsigned char - demux_u = (vector unsigned char)(0x10,0x00,0x10,0x00, + demux_u = (const vector unsigned char)AVV(0x10,0x00,0x10,0x00, 0x10,0x04,0x10,0x04, 0x10,0x08,0x10,0x08, 0x10,0x0c,0x10,0x0c), - demux_v = (vector unsigned char)(0x10,0x02,0x10,0x02, + demux_v = (const vector unsigned char)AVV(0x10,0x02,0x10,0x02, 0x10,0x06,0x10,0x06, 0x10,0x0A,0x10,0x0A, 0x10,0x0E,0x10,0x0E), - demux_y = (vector unsigned char)(0x10,0x01,0x10,0x03, + demux_y = (const vector unsigned char)AVV(0x10,0x01,0x10,0x03, 0x10,0x05,0x10,0x07, 0x10,0x09,0x10,0x0B, 0x10,0x0D,0x10,0x0F); @@ -461,25 +619,25 @@ for (j=0;j<w/16;j++) { uyvy = vec_ld (0, img); U = (vector signed short) - vec_perm (uyvy, (vector unsigned char)(0), demux_u); + vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u); V = (vector signed short) - vec_perm (uyvy, (vector unsigned char)(0), demux_v); + vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v); Y = (vector signed short) - vec_perm (uyvy, (vector unsigned char)(0), demux_y); + vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y); cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0); uyvy = vec_ld (16, img); U = (vector signed short) - vec_perm (uyvy, (vector unsigned char)(0), demux_u); + vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u); V = (vector signed short) - vec_perm (uyvy, (vector unsigned char)(0), demux_v); + vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v); Y = (vector signed short) - vec_perm (uyvy, (vector unsigned char)(0), demux_y); + vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y); cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1); @@ -564,67 +722,44 @@ return NULL; } -void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4]) -{ - vector signed short CY, CRV, CBU, CGU, CGV, OY, Y0; - int64_t crv __attribute__ ((aligned(16))) = inv_table[0]; - int64_t cbu __attribute__ ((aligned(16))) = inv_table[1]; - int64_t cgu __attribute__ ((aligned(16))) = inv_table[2]; - int64_t cgv __attribute__ ((aligned(16))) = inv_table[3]; - int64_t cy = (1<<16)-1; - int64_t oy = 0; - short tmp __attribute__ ((aligned(16))); - - if ((c->flags & SWS_CPU_CAPS_ALTIVEC) == 0) - return; +static uint16_t roundToInt16(int64_t f){ + int r= (f + (1<<15))>>16; + if(r<-0x7FFF) return 0x8000; + else if(r> 0x7FFF) return 0x7FFF; + else return r; +} - cy = (cy *c->contrast )>>17; - crv= (crv*c->contrast * c->saturation)>>32; - cbu= (cbu*c->contrast * c->saturation)>>32; - cgu= (cgu*c->contrast * c->saturation)>>32; - cgv= (cgv*c->contrast * c->saturation)>>32; - - oy -= 256*c->brightness; - - tmp = cy; - CY = vec_lde (0, &tmp); - CY = vec_splat (CY, 0); - - tmp = oy; - OY = vec_lde (0, &tmp); - OY = vec_splat (OY, 0); +void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation) +{ + union { + signed short tmp[8] __attribute__ ((aligned(16))); + vector signed short vec; + } buf; - tmp = crv>>3; - CRV = vec_lde (0, &tmp); - CRV = vec_splat (CRV, 0); - tmp = cbu>>3; - CBU = vec_lde (0, &tmp); - CBU = vec_splat (CBU, 0); + buf.tmp[0] = ( (0xffffLL) * contrast>>8 )>>9; //cy + buf.tmp[1] = -256*brightness; //oy + buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16); //crv + buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16); //cbu + buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16)); //cgu + buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16)); //cgv - tmp = -(cgu>>1); - CGU = vec_lde (0, &tmp); - CGU = vec_splat (CGU, 0); - tmp = -(cgv>>1); - CGV = vec_lde (0, &tmp); - CGV = vec_splat (CGV, 0); - c->CSHIFT = (vector unsigned short)(2); - c->CY = CY; - c->OY = OY; - c->CRV = CRV; - c->CBU = CBU; - c->CGU = CGU; - c->CGV = CGV; - + c->CSHIFT = (vector unsigned short)vec_splat((vector unsigned short)AVV(2),0); + c->CY = vec_splat ((vector signed short)buf.vec, 0); + c->OY = vec_splat ((vector signed short)buf.vec, 1); + c->CRV = vec_splat ((vector signed short)buf.vec, 2); + c->CBU = vec_splat ((vector signed short)buf.vec, 3); + c->CGU = vec_splat ((vector signed short)buf.vec, 4); + c->CGV = vec_splat ((vector signed short)buf.vec, 5); #if 0 - printf ("cy: %hvx\n", CY); - printf ("oy: %hvx\n", OY); - printf ("crv: %hvx\n", CRV); - printf ("cbu: %hvx\n", CBU); - printf ("cgv: %hvx\n", CGV); - printf ("cgu: %hvx\n", CGU); +{ +int i; +char *v[6]={"cy","oy","crv","cbu","cgu","cgv"}; +for (i=0; i<6;i++) + printf("%s %d ", v[i],buf.tmp[i] ); + printf("\n"); +} #endif - return; } @@ -637,15 +772,16 @@ { int i,j; short tmp __attribute__((aligned (16))); - short *p; + int16_t *p; short *f; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; vector signed short R0,G0,B0,R1,G1,B1; vector unsigned char R,G,B,pels[3]; vector unsigned char *out,*nout; - vector signed short RND = (vector signed short)(1<<3); - vector unsigned short SCL = (vector unsigned short)(4); + + vector signed short RND = vec_splat((vector signed short)AVV(1<<3),0); + vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0); unsigned long scratch[16] __attribute__ ((aligned (16))); vector signed short *vYCoeffsBank, *vCCoeffsBank;