# HG changeset patch # User arpi # Date 1018720982 0 # Node ID 628c85c15c7b1d764be6ca7495f9f9250a24e4a3 # Parent fd85802f755bded89dc4852c78098d851f3e4ec8 moved to libmpcodecs/native/ diff -r fd85802f755b -r 628c85c15c7b RTjpegN.c --- a/RTjpegN.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3800 +0,0 @@ -/* - RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za) - - With modifications by: - (c) 1998, 1999 by Joerg Walter - and - (c) 1999 by Wim Taymans - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -*/ - -#include -#include -#include - -#include "config.h" -#ifdef HAVE_MMX -#define MMX -#endif - -#include "RTjpegN.h" - -#ifdef MMX -#include "mmx.h" -#endif - -//#define SHOWBLOCK 1 -#define BETTERCOMPRESSION 1 - -static const unsigned char RTjpeg_ZZ[64]={ -0, -8, 1, -2, 9, 16, -24, 17, 10, 3, -4, 11, 18, 25, 32, -40, 33, 26, 19, 12, 5, -6, 13, 20, 27, 34, 41, 48, -56, 49, 42, 35, 28, 21, 14, 7, -15, 22, 29, 36, 43, 50, 57, -58, 51, 44, 37, 30, 23, -31, 38, 45, 52, 59, -60, 53, 46, 39, -47, 54, 61, -62, 55, -63 }; - -static const __u64 RTjpeg_aan_tab[64]={ -4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, -5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL, -5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL, -5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL, -4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, -3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL, -2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL, -1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL, -}; - -#ifndef MMX -static __s32 RTjpeg_ws[64+31]; -#endif -__u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32]; - -static __s16 *block; // rh -static __s16 *RTjpeg_block; -static __s32 *RTjpeg_lqt; -static __s32 *RTjpeg_cqt; -static __u32 *RTjpeg_liqt; -static __u32 *RTjpeg_ciqt; - -static unsigned char RTjpeg_lb8; -static unsigned char RTjpeg_cb8; -static int RTjpeg_width, RTjpeg_height; -static int RTjpeg_Ywidth, RTjpeg_Cwidth; -static int RTjpeg_Ysize, RTjpeg_Csize; - -static __s16 *RTjpeg_old=NULL; - -#ifdef MMX -mmx_t RTjpeg_lmask; -mmx_t RTjpeg_cmask; -#else -__u16 RTjpeg_lmask; -__u16 RTjpeg_cmask; -#endif -int RTjpeg_mtest=0; - -static const unsigned char RTjpeg_lum_quant_tbl[64] = { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 56, 68, 109, 103, 77, - 24, 35, 55, 64, 81, 104, 113, 92, - 49, 64, 78, 87, 103, 121, 120, 101, - 72, 92, 95, 98, 112, 100, 103, 99 - }; - -static const unsigned char RTjpeg_chrom_quant_tbl[64] = { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - }; - -#ifdef BETTERCOMPRESSION - -/*--------------------------------------------------*/ -/* better encoding, but needs a lot more cpu time */ -/* seems to be more effective than old method +lzo */ -/* with this encoding lzo isn't efficient anymore */ -/* there is still more potential for better */ -/* encoding but that would need even more cputime */ -/* anyway your mileage may vary */ -/* */ -/* written by Martin BIELY and Roman HOCHLEITNER */ -/*--------------------------------------------------*/ - -/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* Block to Stream (encoding) */ -/* */ - -int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) -{ - register int ci, co=1; - register __s16 ZZvalue; - register unsigned char bitten; - register unsigned char bitoff; - -#ifdef SHOWBLOCK - - int ii; - for (ii=0; ii < 64; ii++) { - fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); - } - fprintf(stdout, "\n\n"); - -#endif - -// *strm++ = 0x10; -// *strm = 0x00; -// -// return 2; - - // first byte allways written - (__u8)strm[0]= - (__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); - - - ci=63; - while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--; - - bitten = ((unsigned char)ci) << 2; - - if (ci==0) { - (__u8)strm[1]= bitten; - co = 2; - return (int)co; - } - - /* bitoff=0 because the high 6bit contain first non zero position */ - bitoff = 0; - co = 1; - - for(; ci>0; ci--) { - - ZZvalue = data[RTjpeg_ZZ[ci]]; - - switch(ZZvalue) { - case 0: - break; - case 1: - bitten |= (0x01<0; ci--) { - - ZZvalue = data[RTjpeg_ZZ[ci]]; - - if( (ZZvalue > 7) || (ZZvalue < -7) ) { - bitten |= (0x08<0; ci--) { - - ZZvalue = data[RTjpeg_ZZ[ci]]; - - if(ZZvalue>0) - { - strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; - } - else - { - strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; - } - - } - - -BAUCHWEH: - /* we gotoo much now we are ill */ -#ifdef SHOWBLOCK -{ -int i; -fprintf(stdout, "\nco = '%d'\n", co); - for (i=0; i < co+2; i++) { - fprintf(stdout, "%d ", strm[i]); - } -fprintf(stdout, "\n\n"); -} -#endif - - return (int)co; -} - -/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* Stream to Block (decoding) */ -/* */ - -int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl) -{ - int ci; - register int co; - register int i; - register unsigned char bitten; - register unsigned char bitoff; - - /* first byte always read */ - i=RTjpeg_ZZ[0]; - data[i]=((__u8)strm[0])*qtbl[i]; - - /* we start at the behind */ - - bitten = ((unsigned char)strm[1]) >> 2; - co = 63; - for(; co > bitten; co--) { - - data[RTjpeg_ZZ[co]] = 0; - - } - - if (co==0) { - ci = 2; - goto AUTOBAHN; - } - - /* we have to read the last 2 bits of the second byte */ - ci=1; - bitoff = 0; - - for(; co>0; co--) { - - bitten = ((unsigned char)strm[ci]) >> bitoff; - bitten &= 0x03; - - i=RTjpeg_ZZ[co]; - - switch( bitten ) { - case 0x03: - data[i]= -qtbl[i]; - break; - case 0x02: - goto FUSSWEG; - break; - case 0x01: - data[i]= qtbl[i]; - break; - case 0x00: - data[i]= 0; - break; - default: - - } - - if( bitoff == 0 ) { - bitoff = 8; - ci++; - } - bitoff -= 2; - } - /* co is 0 now */ - /* data is written properly */ - - /* if bitoff!=6 then ci is the index, but should be the byte count, so we increment by 1 */ - if (bitoff!=6) ci++; - - goto AUTOBAHN; - - -FUSSWEG: -/* correct bitoff to nibble */ - switch(bitoff){ - case 4: - case 6: - bitoff = 0; - break; - case 2: - case 0: - /* we have to read from the next byte */ - ci++; - bitoff = 4; - break; - default: - break; - } - - for(; co>0; co--) { - - bitten = ((unsigned char)strm[ci]) >> bitoff; - bitten &= 0x0f; - - i=RTjpeg_ZZ[co]; - - if( bitten == 0x08 ) { - goto STRASSE; - } - - /* the compiler cannot do sign extension for signed nibbles */ - if( bitten & 0x08 ) { - bitten |= 0xf0; - } - /* the unsigned char bitten now is a valid signed char */ - - data[i]=((signed char)bitten)*qtbl[i]; - - if( bitoff == 0 ) { - bitoff = 8; - ci++; - } - bitoff -= 4; - } - /* co is 0 */ - - /* if bitoff!=4 then ci is the index, but should be the byte count, so we increment by 1 */ - if (bitoff!=4) ci++; - - goto AUTOBAHN; - -STRASSE: - ci++; - - for(; co>0; co--) { - i=RTjpeg_ZZ[co]; - data[i]=strm[ci++]*qtbl[i]; - } - - /* ci now is the count, because it points to next element => no incrementing */ - -AUTOBAHN: - -#ifdef SHOWBLOCK -fprintf(stdout, "\nci = '%d'\n", ci); - for (i=0; i < 64; i++) { - fprintf(stdout, "%d ", data[RTjpeg_ZZ[i]]); - } -fprintf(stdout, "\n\n"); -#endif - - return ci; -} - -#else - -int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) -{ - register int ci, co=1, tmp; - register __s16 ZZvalue; - -#ifdef SHOWBLOCK - - int ii; - for (ii=0; ii < 64; ii++) { - fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); - } - fprintf(stdout, "\n\n"); - -#endif - - (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); - - for(ci=1; ci<=bt8; ci++) - { - ZZvalue = data[RTjpeg_ZZ[ci]]; - - if(ZZvalue>0) - { - strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; - } - else - { - strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; - } - } - - for(; ci<64; ci++) - { - ZZvalue = data[RTjpeg_ZZ[ci]]; - - if(ZZvalue>0) - { - strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue; - } - else if(ZZvalue<0) - { - strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue; - } - else /* compress zeros */ - { - tmp=ci; - do - { - ci++; - } - while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0)); - - strm[co++]=(__s8)(63+(ci-tmp)); - ci--; - } - } - return (int)co; -} - -int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl) -{ - int ci=1, co=1, tmp; - register int i; - - i=RTjpeg_ZZ[0]; - data[i]=((__u8)strm[0])*qtbl[i]; - - for(co=1; co<=bt8; co++) - { - i=RTjpeg_ZZ[co]; - data[i]=strm[ci++]*qtbl[i]; - } - - for(; co<64; co++) - { - if(strm[ci]>63) - { - tmp=co+strm[ci]-63; - for(; co>16); -} -#endif - -/* - * Perform the forward DCT on one block of samples. - */ -#ifdef MMX -static mmx_t RTjpeg_C4 =(mmx_t)(long long)0x2D412D412D412D41LL; -static mmx_t RTjpeg_C6 =(mmx_t)(long long)0x187E187E187E187ELL; -static mmx_t RTjpeg_C2mC6=(mmx_t)(long long)0x22A322A322A322A3LL; -static mmx_t RTjpeg_C2pC6=(mmx_t)(long long)0x539F539F539F539FLL; -static mmx_t RTjpeg_zero =(mmx_t)(long long)0x0000000000000000LL; - -#else - -#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */ -#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */ -#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */ -#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */ - -#define DESCALE10(x) (__s16)( ((x)+128) >> 8) -#define DESCALE20(x) (__s16)(((x)+32768) >> 16) -#define D_MULTIPLY(var,const) ((__s32) ((var) * (const))) -#endif - -void RTjpeg_dct_init(void) -{ - int i; - - for(i=0; i<64; i++) - { - RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]); - RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]); - } -} - -void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip) -{ -#ifndef MMX - __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - __s32 tmp10, tmp11, tmp12, tmp13; - __s32 z1, z2, z3, z4, z5, z11, z13; - __u8 *idataptr; - __s16 *odataptr; - __s32 *wsptr; - int ctr; - - idataptr = idata; - wsptr = RTjpeg_ws; - for (ctr = 7; ctr >= 0; ctr--) { - tmp0 = idataptr[0] + idataptr[7]; - tmp7 = idataptr[0] - idataptr[7]; - tmp1 = idataptr[1] + idataptr[6]; - tmp6 = idataptr[1] - idataptr[6]; - tmp2 = idataptr[2] + idataptr[5]; - tmp5 = idataptr[2] - idataptr[5]; - tmp3 = idataptr[3] + idataptr[4]; - tmp4 = idataptr[3] - idataptr[4]; - - tmp10 = (tmp0 + tmp3); /* phase 2 */ - tmp13 = tmp0 - tmp3; - tmp11 = (tmp1 + tmp2); - tmp12 = tmp1 - tmp2; - - wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */ - wsptr[4] = (tmp10 - tmp11)<<8; - - z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ - wsptr[2] = (tmp13<<8) + z1; /* phase 5 */ - wsptr[6] = (tmp13<<8) - z1; - - tmp10 = tmp4 + tmp5; /* phase 2 */ - tmp11 = tmp5 + tmp6; - tmp12 = tmp6 + tmp7; - - z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ - z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ - z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ - z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - - z11 = (tmp7<<8) + z3; /* phase 5 */ - z13 = (tmp7<<8) - z3; - - wsptr[5] = z13 + z2; /* phase 6 */ - wsptr[3] = z13 - z2; - wsptr[1] = z11 + z4; - wsptr[7] = z11 - z4; - - idataptr += rskip<<3; /* advance pointer to next row */ - wsptr += 8; - } - - wsptr = RTjpeg_ws; - odataptr=odata; - for (ctr = 7; ctr >= 0; ctr--) { - tmp0 = wsptr[0] + wsptr[56]; - tmp7 = wsptr[0] - wsptr[56]; - tmp1 = wsptr[8] + wsptr[48]; - tmp6 = wsptr[8] - wsptr[48]; - tmp2 = wsptr[16] + wsptr[40]; - tmp5 = wsptr[16] - wsptr[40]; - tmp3 = wsptr[24] + wsptr[32]; - tmp4 = wsptr[24] - wsptr[32]; - - tmp10 = tmp0 + tmp3; /* phase 2 */ - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */ - odataptr[32] = DESCALE10(tmp10 - tmp11); - - z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ - odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */ - odataptr[48] = DESCALE20((tmp13<<8) - z1); - - tmp10 = tmp4 + tmp5; /* phase 2 */ - tmp11 = tmp5 + tmp6; - tmp12 = tmp6 + tmp7; - - z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ - z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ - z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ - z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - - z11 = (tmp7<<8) + z3; /* phase 5 */ - z13 = (tmp7<<8) - z3; - - odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */ - odataptr[24] = DESCALE20(z13 - z2); - odataptr[8] = DESCALE20(z11 + z4); - odataptr[56] = DESCALE20(z11 - z4); - - odataptr++; /* advance pointer to next column */ - wsptr++; - } -#else - volatile mmx_t tmp6, tmp7; - register mmx_t *dataptr = (mmx_t *)odata; - mmx_t *idata2 = (mmx_t *)idata; - - // first copy the input 8 bit to the destination 16 bits - - movq_m2r(RTjpeg_zero, mm2); - - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+1)); - - idata2 += rskip; - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr+2)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+3)); - - idata2 += rskip; - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr+4)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+5)); - - idata2 += rskip; - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr+6)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+7)); - - idata2 += rskip; - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr+8)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+9)); - - idata2 += rskip; - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr+10)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+11)); - - idata2 += rskip; - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr+12)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+13)); - - idata2 += rskip; - - movq_m2r(*idata2, mm0); - movq_r2r(mm0, mm1); - - punpcklbw_r2r(mm2, mm0); - movq_r2m(mm0, *(dataptr+14)); - - punpckhbw_r2r(mm2, mm1); - movq_r2m(mm1, *(dataptr+15)); - -/* Start Transpose to do calculations on rows */ - - movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into m5 - - movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 - movq_r2r(mm7, mm5); - - punpcklwd_m2r(*(dataptr+11), mm7); // m11:m01|m10:m00 - interleave first and second lines - movq_r2r(mm6, mm2); - - punpcklwd_m2r(*(dataptr+15), mm6); // m31:m21|m30:m20 - interleave third and fourth lines - movq_r2r(mm7, mm1); - - movq_m2r(*(dataptr+11), mm3); // m13:m13|m11:m10 - second line - punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 - - movq_m2r(*(dataptr+15), mm0); // m13:m13|m11:m10 - fourth line - punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 - - movq_r2m(mm7,*(dataptr+9)); // write result 1 - punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines - - movq_r2m(mm1,*(dataptr+11)); // write result 2 - punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines - - movq_r2r(mm5, mm1); - punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 - - movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 - punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 - - movq_r2m(mm5,*(dataptr+13)); // write result 3 - - // last 4x4 done - - movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 - - movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line - movq_r2r(mm0, mm6); - - punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines - movq_r2r(mm2, mm7); - - punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines - movq_r2r(mm0, mm4); - - // - movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line - punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result - - movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line - punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result - - punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines - movq_r2r(mm1, mm2); // copy first line - - punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines - movq_r2r(mm6, mm5); // copy first intermediate result - - movq_r2m(mm0, *(dataptr+8)); // write result 1 - punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result - - punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines - movq_r2r(mm3, mm0); // copy third line - - punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines - - movq_r2m(mm4, *(dataptr+10)); // write result 2 out - punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result - - punpcklwd_m2r(*(dataptr+14), mm3); // n31:n21|n30:n20 - interleave third and fourth lines - movq_r2r(mm1, mm4); - - movq_r2m(mm6, *(dataptr+12)); // write result 3 out - punpckldq_r2r(mm3, mm1); // n30:n20|n10:n00 - produce first result - - punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines - movq_r2r(mm2, mm6); - - movq_r2m(mm5, *(dataptr+14)); // write result 4 out - punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result - - movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) - punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result - - movq_r2m(mm4, *(dataptr+3)); // write result 6 out - punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result - - movq_r2m(mm2, *(dataptr+5)); // write result 7 out - - movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 - - movq_r2m(mm6, *(dataptr+7)); // write result 8 out - - -// Do first 4x4 quadrant, which is used in the beginning of the DCT: - - movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line - movq_r2r(mm0, mm2); - - punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines - movq_r2r(mm7, mm4); - - punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines - movq_r2r(mm0, mm1); - - movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line - punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 - - movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line - punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 - - movq_r2r(mm0, mm7); // write result 1 - punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines - - psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ - movq_r2r(mm1, mm6); // write result 2 - - paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ - punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines - - paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ - movq_r2r(mm2, mm3); // copy first intermediate result - - psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ - punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 - - movq_r2m(mm7, tmp7); - movq_r2r(mm2, mm5); // write result 3 - - movq_r2m(mm6, tmp6); - punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 - - paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+5 /* Stage 1 */ - movq_r2r(mm3, mm4); // write result 4 - -/************************************************************************************************ - End of Transpose -************************************************************************************************/ - - - paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ - movq_r2r(mm0, mm7); - - psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ - movq_r2r(mm1, mm6); - - paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ - psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ - - psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ - paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ - - psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ - paddw_r2r(mm7, mm6); // tmp12 + tmp13 - - /* stage 3 */ - - movq_m2r(tmp6, mm2); - movq_r2r(mm0, mm3); - - psllw_i2r(2, mm6); // m8 * 2^2 - paddw_r2r(mm1, mm0); - - pmulhw_m2r(RTjpeg_C4, mm6); // z1 - psubw_r2r(mm1, mm3); - - movq_r2m(mm0, *dataptr); - movq_r2r(mm7, mm0); - - /* Odd part */ - movq_r2m(mm3, *(dataptr+8)); - paddw_r2r(mm5, mm4); // tmp10 - - movq_m2r(tmp7, mm3); - paddw_r2r(mm6, mm0); // tmp32 - - paddw_r2r(mm2, mm5); // tmp11 - psubw_r2r(mm6, mm7); // tmp33 - - movq_r2m(mm0, *(dataptr+4)); - paddw_r2r(mm3, mm2); // tmp12 - - /* stage 4 */ - - movq_r2m(mm7, *(dataptr+12)); - movq_r2r(mm4, mm1); // copy of tmp10 - - psubw_r2r(mm2, mm1); // tmp10 - tmp12 - psllw_i2r(2, mm4); // m8 * 2^2 - - movq_m2r(RTjpeg_C2mC6, mm0); - psllw_i2r(2, mm1); - - pmulhw_m2r(RTjpeg_C6, mm1); // z5 - psllw_i2r(2, mm2); - - pmulhw_r2r(mm0, mm4); // z5 - - /* stage 5 */ - - pmulhw_m2r(RTjpeg_C2pC6, mm2); - psllw_i2r(2, mm5); - - pmulhw_m2r(RTjpeg_C4, mm5); // z3 - movq_r2r(mm3, mm0); // copy tmp7 - - movq_m2r(*(dataptr+1), mm7); - paddw_r2r(mm1, mm4); // z2 - - paddw_r2r(mm1, mm2); // z4 - - paddw_r2r(mm5, mm0); // z11 - psubw_r2r(mm5, mm3); // z13 - - /* stage 6 */ - - movq_r2r(mm3, mm5); // copy z13 - psubw_r2r(mm4, mm3); // y3=z13 - z2 - - paddw_r2r(mm4, mm5); // y5=z13 + z2 - movq_r2r(mm0, mm6); // copy z11 - - movq_r2m(mm3, *(dataptr+6)); //save y3 - psubw_r2r(mm2, mm0); // y7=z11 - z4 - - movq_r2m(mm5, *(dataptr+10)); //save y5 - paddw_r2r(mm2, mm6); // y1=z11 + z4 - - movq_r2m(mm0, *(dataptr+14)); //save y7 - - /************************************************ - * End of 1st 4 rows - ************************************************/ - - movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ - movq_r2r(mm7, mm0); // copy x0 - - movq_r2m(mm6, *(dataptr+2)); //save y1 - - movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ - movq_r2r(mm1, mm6); // copy x1 - - paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 - - movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ - movq_r2r(mm2, mm5); // copy x2 - - psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 - movq_r2r(mm3, mm4); // copy x3 - - paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 - - movq_r2m(mm7, tmp7); // save tmp07 - movq_r2r(mm0, mm7); // copy tmp00 - - psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 - - /* stage 2, Even Part */ - - paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 - - movq_r2m(mm6, tmp6); // save tmp07 - movq_r2r(mm1, mm6); // copy tmp01 - - paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 - paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 - - psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 - - psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 - psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 - - paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 - - psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 - paddw_r2r(mm7, mm6); // tmp12 + tmp13 - - /* stage 3, Even and stage 4 & 5 even */ - - movq_m2r(tmp6, mm2); // load tmp6 - movq_r2r(mm0, mm3); // copy tmp10 - - psllw_i2r(2, mm6); // shift z1 - paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 - - pmulhw_m2r(RTjpeg_C4, mm6); // z1 - psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 - - movq_r2m(mm0, *(dataptr+1)); //save y0 - movq_r2r(mm7, mm0); // copy tmp13 - - /* odd part */ - - movq_r2m(mm3, *(dataptr+9)); //save y4 - paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 - - movq_m2r(tmp7, mm3); // load tmp7 - paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 - - paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 - psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 - - movq_r2m(mm0, *(dataptr+5)); //save y2 - paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 - - /* stage 4 */ - - movq_r2m(mm7, *(dataptr+13)); //save y6 - movq_r2r(mm4, mm1); // copy tmp10 - - psubw_r2r(mm2, mm1); // tmp10 - tmp12 - psllw_i2r(2, mm4); // shift tmp10 - - movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 - psllw_i2r(2, mm1); // shift (tmp10-tmp12) - - pmulhw_m2r(RTjpeg_C6, mm1); // z5 - psllw_i2r(2, mm5); // prepare for multiply - - pmulhw_r2r(mm0, mm4); // multiply by converted real - - /* stage 5 */ - - pmulhw_m2r(RTjpeg_C4, mm5); // z3 - psllw_i2r(2, mm2); // prepare for multiply - - pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply - movq_r2r(mm3, mm0); // copy tmp7 - - movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 - paddw_r2r(mm1, mm4); // z2 - - paddw_r2r(mm5, mm0); // z11 - psubw_r2r(mm5, mm3); // z13 - - /* stage 6 */ - - movq_r2r(mm3, mm5); // copy z13 - paddw_r2r(mm1, mm2); // z4 - - movq_r2r(mm0, mm6); // copy z11 - psubw_r2r(mm4, mm5); // y3 - - paddw_r2r(mm2, mm6); // y1 - paddw_r2r(mm4, mm3); // y5 - - movq_r2m(mm5, *(dataptr+7)); //save y3 - - movq_r2m(mm6, *(dataptr+3)); //save y1 - psubw_r2r(mm2, mm0); // y7 - -/************************************************************************************************ - Start of Transpose -************************************************************************************************/ - - movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 - movq_r2r(mm7, mm5); // copy first line - - punpcklwd_r2r(mm3, mm7); // m11:m01|m10:m00 - interleave first and second lines - movq_r2r(mm6, mm2); // copy third line - - punpcklwd_r2r(mm0, mm6); // m31:m21|m30:m20 - interleave third and fourth lines - movq_r2r(mm7, mm1); // copy first intermediate result - - punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 - - punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 - - movq_r2m(mm7, *(dataptr+9)); // write result 1 - punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines - - movq_r2m(mm1, *(dataptr+11)); // write result 2 - punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines - - movq_r2r(mm5, mm1); // copy first intermediate result - punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 - - movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 - punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 - - movq_r2m(mm5, *(dataptr+13)); // write result 3 - - /****** last 4x4 done */ - - movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 - - movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line - movq_r2r(mm0, mm6); // copy first line - - punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines - movq_r2r(mm2, mm7); // copy third line - - punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines - movq_r2r(mm0, mm4); // copy first intermediate result - - - - movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line - punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result - - movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line - punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result - - punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines - movq_r2r(mm1, mm2); // copy first line - - punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines - movq_r2r(mm6, mm5); // copy first intermediate result - - movq_r2m(mm0, *(dataptr+8)); // write result 1 - punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result - - punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines - movq_r2r(mm3, mm0); // copy third line - - punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines - - movq_r2m(mm4, *(dataptr+10)); // write result 2 out - punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result - - punpcklwd_m2r(*(dataptr+14), mm3); // n33:n23|n32:n22 - interleave third and fourth lines - movq_r2r(mm1, mm4); // copy second intermediate result - - movq_r2m(mm6, *(dataptr+12)); // write result 3 out - punpckldq_r2r(mm3, mm1); // - - punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines - movq_r2r(mm2, mm6); // copy second intermediate result - - movq_r2m(mm5, *(dataptr+14)); // write result 4 out - punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result - - movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) - punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result - - movq_r2m(mm4, *(dataptr+3)); // write result 6 out - punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result - - movq_r2m(mm2, *(dataptr+5)); // write result 7 out - - movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 - - movq_r2m(mm6, *(dataptr+7)); // write result 8 out - -// Do first 4x4 quadrant, which is used in the beginning of the DCT: - - movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line - movq_r2r(mm0, mm2); // copy first line - - punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines - movq_r2r(mm7, mm4); // copy third line - - punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines - movq_r2r(mm0, mm1); // copy first intermediate result - - movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line - punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 - - movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line - punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 - - movq_r2r(mm0, mm7); // write result 1 - punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines - - psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ - movq_r2r(mm1, mm6); // write result 2 - - paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ - punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines - - paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ - movq_r2r(mm2, mm3); // copy first intermediate result - - psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ - punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 - - movq_r2m(mm7, tmp7); // save tmp07 - movq_r2r(mm2, mm5); // write result 3 - - movq_r2m(mm6, tmp6); // save tmp06 - - punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 - - paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+x5 /* stage 1 */ - movq_r2r(mm3, mm4); // write result 4 - -/************************************************************************************************ - End of Transpose 2 -************************************************************************************************/ - - paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ - movq_r2r(mm0, mm7); - - psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ - movq_r2r(mm1, mm6); - - paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ - psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ - - psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ - paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ - - psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ - paddw_r2r(mm7, mm6); // tmp12 + tmp13 - - /* stage 3 */ - - movq_m2r(tmp6, mm2); - movq_r2r(mm0, mm3); - - psllw_i2r(2, mm6); // m8 * 2^2 - paddw_r2r(mm1, mm0); - - pmulhw_m2r(RTjpeg_C4, mm6); // z1 - psubw_r2r(mm1, mm3); - - movq_r2m(mm0, *dataptr); - movq_r2r(mm7, mm0); - - /* Odd part */ - movq_r2m(mm3, *(dataptr+8)); - paddw_r2r(mm5, mm4); // tmp10 - - movq_m2r(tmp7, mm3); - paddw_r2r(mm6, mm0); // tmp32 - - paddw_r2r(mm2, mm5); // tmp11 - psubw_r2r(mm6, mm7); // tmp33 - - movq_r2m(mm0, *(dataptr+4)); - paddw_r2r(mm3, mm2); // tmp12 - - /* stage 4 */ - movq_r2m(mm7, *(dataptr+12)); - movq_r2r(mm4, mm1); // copy of tmp10 - - psubw_r2r(mm2, mm1); // tmp10 - tmp12 - psllw_i2r(2, mm4); // m8 * 2^2 - - movq_m2r(RTjpeg_C2mC6, mm0); - psllw_i2r(2, mm1); - - pmulhw_m2r(RTjpeg_C6, mm1); // z5 - psllw_i2r(2, mm2); - - pmulhw_r2r(mm0, mm4); // z5 - - /* stage 5 */ - - pmulhw_m2r(RTjpeg_C2pC6, mm2); - psllw_i2r(2, mm5); - - pmulhw_m2r(RTjpeg_C4, mm5); // z3 - movq_r2r(mm3, mm0); // copy tmp7 - - movq_m2r(*(dataptr+1), mm7); - paddw_r2r(mm1, mm4); // z2 - - paddw_r2r(mm1, mm2); // z4 - - paddw_r2r(mm5, mm0); // z11 - psubw_r2r(mm5, mm3); // z13 - - /* stage 6 */ - - movq_r2r(mm3, mm5); // copy z13 - psubw_r2r(mm4, mm3); // y3=z13 - z2 - - paddw_r2r(mm4, mm5); // y5=z13 + z2 - movq_r2r(mm0, mm6); // copy z11 - - movq_r2m(mm3, *(dataptr+6)); //save y3 - psubw_r2r(mm2, mm0); // y7=z11 - z4 - - movq_r2m(mm5, *(dataptr+10)); //save y5 - paddw_r2r(mm2, mm6); // y1=z11 + z4 - - movq_r2m(mm0, *(dataptr+14)); //save y7 - - /************************************************ - * End of 1st 4 rows - ************************************************/ - - movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ - movq_r2r(mm7, mm0); // copy x0 - - movq_r2m(mm6, *(dataptr+2)); //save y1 - - movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ - movq_r2r(mm1, mm6); // copy x1 - - paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 - - movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ - movq_r2r(mm2, mm5); // copy x2 - - psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 - movq_r2r(mm3, mm4); // copy x3 - - paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 - - movq_r2m(mm7, tmp7); // save tmp07 - movq_r2r(mm0, mm7); // copy tmp00 - - psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 - - /* stage 2, Even Part */ - - paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 - - movq_r2m(mm6, tmp6); // save tmp07 - movq_r2r(mm1, mm6); // copy tmp01 - - paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 - paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 - - psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 - - psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 - psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 - - paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 - - psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 - paddw_r2r(mm7, mm6); // tmp12 + tmp13 - - /* stage 3, Even and stage 4 & 5 even */ - - movq_m2r(tmp6, mm2); // load tmp6 - movq_r2r(mm0, mm3); // copy tmp10 - - psllw_i2r(2, mm6); // shift z1 - paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 - - pmulhw_m2r(RTjpeg_C4, mm6); // z1 - psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 - - movq_r2m(mm0, *(dataptr+1)); //save y0 - movq_r2r(mm7, mm0); // copy tmp13 - - /* odd part */ - - movq_r2m(mm3, *(dataptr+9)); //save y4 - paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 - - movq_m2r(tmp7, mm3); // load tmp7 - paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 - - paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 - psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 - - movq_r2m(mm0, *(dataptr+5)); //save y2 - paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 - - /* stage 4 */ - - movq_r2m(mm7, *(dataptr+13)); //save y6 - movq_r2r(mm4, mm1); // copy tmp10 - - psubw_r2r(mm2, mm1); // tmp10 - tmp12 - psllw_i2r(2, mm4); // shift tmp10 - - movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 - psllw_i2r(2, mm1); // shift (tmp10-tmp12) - - pmulhw_m2r(RTjpeg_C6, mm1); // z5 - psllw_i2r(2, mm5); // prepare for multiply - - pmulhw_r2r(mm0, mm4); // multiply by converted real - - /* stage 5 */ - - pmulhw_m2r(RTjpeg_C4, mm5); // z3 - psllw_i2r(2, mm2); // prepare for multiply - - pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply - movq_r2r(mm3, mm0); // copy tmp7 - - movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 - paddw_r2r(mm1, mm4); // z2 - - paddw_r2r(mm5, mm0); // z11 - psubw_r2r(mm5, mm3); // z13 - - /* stage 6 */ - - movq_r2r(mm3, mm5); // copy z13 - paddw_r2r(mm1, mm2); // z4 - - movq_r2r(mm0, mm6); // copy z11 - psubw_r2r(mm4, mm5); // y3 - - paddw_r2r(mm2, mm6); // y1 - paddw_r2r(mm4, mm3); // y5 - - movq_r2m(mm5, *(dataptr+7)); //save y3 - psubw_r2r(mm2, mm0); // yè=z11 - z4 - - movq_r2m(mm3, *(dataptr+11)); //save y5 - - movq_r2m(mm6, *(dataptr+3)); //save y1 - - movq_r2m(mm0, *(dataptr+15)); //save y7 - - -#endif -} - -#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */ -#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */ -#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */ -#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */ - -#define DESCALE(x) (__s16)( ((x)+4) >> 3) - -/* clip yuv to 16..235 (should be 16..240 for cr/cb but ... */ - -#define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x)) -#define MULTIPLY(var,const) (((__s32) ((var) * (const)) + 128)>>8) - -void RTjpeg_idct_init(void) -{ - int i; - - for(i=0; i<64; i++) - { - RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32; - RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32; - } -} - -void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip) -{ -#ifdef MMX - -static mmx_t fix_141 = (mmx_t)(long long)0x5a825a825a825a82LL; -static mmx_t fix_184n261 = (mmx_t)(long long)0xcf04cf04cf04cf04LL; -static mmx_t fix_184 = (mmx_t)(long long)0x7641764176417641LL; -static mmx_t fix_n184 = (mmx_t)(long long)0x896f896f896f896fLL; -static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL; - - mmx_t workspace[64]; - mmx_t *wsptr = workspace; - register mmx_t *dataptr = (mmx_t *)odata; - mmx_t *idata = (mmx_t *)data; - - rskip = rskip>>3; -/* - * Perform inverse DCT on one block of coefficients. - */ - - /* Odd part */ - - movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5] - - movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3] - - movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1] - - movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */ - - movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7] - - paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5; - - psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5 - - psllw_i2r(2, mm2); // shift z10 - movq_r2r(mm2, mm0); // copy z10 - - pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm3, mm5); // copy tmp4 - - pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ - paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7; - - movq_r2r(mm3, mm6); // copy z11 /* phase 5 */ - psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7; - - psubw_r2r(mm1, mm6); // z11-z13 - psllw_i2r(2, mm5); // shift z12 - - movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part - movq_r2r(mm5, mm7); // copy z12 - - pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part - paddw_r2r(mm1, mm3); // tmp7 = z11 + z13; - - //ok - - /* Even part */ - pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ - psllw_i2r(2, mm6); - - movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2] - - paddw_r2r(mm5, mm0); // tmp10 - - paddw_r2r(mm7, mm2); // tmp12 - - pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ - psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7 - - movq_r2r(mm1, mm5); // copy tmp1 - paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */ - - psubw_r2r(mm4, mm5); // tmp1-tmp3 - psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6; - - movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace - psllw_i2r(2, mm5); // shift tmp1-tmp3 - - movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0] - - pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5; - - movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4] - - psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ - - movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace - movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */ - - movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace - psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2; - - paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2; - movq_r2r(mm1, mm5); // copy tmp11 - - paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12; - movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */ - - paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13; - - psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13; - movq_r2r(mm7, mm0); // copy tmp0 - - psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12; - paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); - - psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); - - movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0] - movq_r2r(mm1, mm3); // copy tmp1 - - movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7] - paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); - - psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); - - movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1] - movq_r2r(mm4, mm1); // copy tmp3 - - movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6] - - paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); - - psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); - - movq_r2m(mm4, *(wsptr+8)); - movq_r2r(mm5, mm7); // copy tmp2 - - paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) - - movq_r2m(mm1, *(wsptr+6)); - psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); - - movq_r2m(mm5, *(wsptr+4)); - - movq_r2m(mm7, *(wsptr+10)); - - //ok - - -/*****************************************************************/ - - idata++; - wsptr++; - -/*****************************************************************/ - - movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5] - - movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3] - - movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1] - movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */ - - movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7] - paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5; - - psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5 - - psllw_i2r(2, mm2); // shift z10 - movq_r2r(mm2, mm0); // copy z10 - - pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm3, mm5); // copy tmp4 - - pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ - paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7; - - movq_r2r(mm3, mm6); // copy z11 /* phase 5 */ - psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7; - - psubw_r2r(mm1, mm6); // z11-z13 - psllw_i2r(2, mm5); // shift z12 - - movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part - movq_r2r(mm5, mm7); // copy z12 - - pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part - paddw_r2r(mm1, mm3); // tmp7 = z11 + z13; - - //ok - - /* Even part */ - pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ - psllw_i2r(2, mm6); - - movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2] - - paddw_r2r(mm5, mm0); // tmp10 - - paddw_r2r(mm7, mm2); // tmp12 - - pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ - psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7 - - movq_r2r(mm1, mm5); // copy tmp1 - paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */ - - psubw_r2r(mm4, mm5); // tmp1-tmp3 - psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6; - - movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace - psllw_i2r(2, mm5); // shift tmp1-tmp3 - - movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0] - paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5; - - pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - - movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4] - - psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ - - movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace - movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */ - - movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace - psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2; - - paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2; - movq_r2r(mm1, mm5); // copy tmp11 - - paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12; - movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */ - - paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13; - - psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13; - movq_r2r(mm7, mm0); // copy tmp0 - - psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12; - paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); - - psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); - - movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0] - movq_r2r(mm1, mm3); // copy tmp1 - - movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7] - paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); - - psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); - - movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1] - movq_r2r(mm4, mm1); // copy tmp3 - - movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6] - - paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); - - psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); - - movq_r2m(mm4, *(wsptr+8)); - movq_r2r(mm5, mm7); // copy tmp2 - - paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) - - movq_r2m(mm1, *(wsptr+6)); - psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); - - movq_r2m(mm5, *(wsptr+4)); - - movq_r2m(mm7, *(wsptr+10)); - -/*****************************************************************/ - - /* Pass 2: process rows from work array, store into output array. */ - /* Note that we must descale the results by a factor of 8 == 2**3, */ - /* and also undo the PASS1_BITS scaling. */ - -/*****************************************************************/ - /* Even part */ - - wsptr--; - -// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); -// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); -// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); -// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); - movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3] - - movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7] - movq_r2r(mm0, mm2); - - movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3] - paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] - - movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7] - psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] - - movq_r2r(mm0, mm6); - movq_r2r(mm3, mm5); - - paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] - movq_r2r(mm2, mm1); - - psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] - punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] - - movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7] - punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] - - movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] - - punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] - movq_r2r(mm3, mm4); - - movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3] - punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] - - movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - - paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] - movq_r2r(mm6, mm2); - - psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] - paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] - - movq_r2r(mm3, mm5); - punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] - - psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] - punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] - - movq_r2r(mm4, mm7); - punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] - - punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] - - punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] - - punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] - movq_r2r(mm1, mm6); - - //ok - -// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] -// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - - movq_r2r(mm0, mm2); - punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] - - punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] - psllw_i2r(2, mm6); - - pmulhw_m2r(fix_141, mm6); - punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] - - punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] - movq_r2r(mm0, mm7); - -// tmp0 = tmp10 + tmp13; -// tmp3 = tmp10 - tmp13; - paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] - psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] - -// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; - psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] -// tmp1 = tmp11 + tmp12; -// tmp2 = tmp11 - tmp12; - movq_r2r(mm1, mm5); - - //OK - - /* Odd part */ - -// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; -// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; -// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; -// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; - movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3] - paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] - - movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7] - psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] - - movq_r2r(mm3, mm6); - punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5] - - punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3] - movq_r2r(mm3, mm2); - -//Save tmp0 and tmp1 in wsptr - movq_r2m(mm0, *(wsptr)); // save tmp0 - paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13] - - -//Continue with z10 --- z13 - movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3] - psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10] - - movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7] - movq_r2r(mm6, mm4); - - movq_r2m(mm1, *(wsptr+1)); // save tmp1 - punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5] - - punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3] - movq_r2r(mm6, mm1); - -//Save tmp2 and tmp3 in wsptr - paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13] - movq_r2r(mm2, mm4); - -//Continue with z10 --- z13 - movq_r2m(mm5, *(wsptr+2)); // save tmp2 - punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11] - - psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10] - punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13] - - movq_r2r(mm3, mm0); - punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12] - - movq_r2m(mm7, *(wsptr+3)); // save tmp3 - punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10] - - movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11] - - movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13] - - movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3] - movq_r2r(mm6, mm4); - - punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5] - movq_r2r(mm1, mm5); - - punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3] - movq_r2r(mm6, mm2); - - movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7] - paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13] - - psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10] - punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5] - - punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3] - movq_r2r(mm1, mm7); - - paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13] - psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10] - - movq_r2r(mm6, mm5); - punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11] - - punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13] - movq_r2r(mm2, mm4); - - punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12] - - punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10] - - punpckhdq_r2r(mm6, mm4); /// wsptr[2,z10],[3,z10],[2,z11],[3,z11] - - punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13] - movq_r2r(mm0, mm5); - - punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10] - - punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11] - movq_r2r(mm3, mm4); - - punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13] - movq_r2r(mm5, mm1); - - punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12] -// tmp7 = z11 + z13; /* phase 5 */ -// tmp8 = z11 - z13; /* phase 5 */ - psubw_r2r(mm4, mm1); // tmp8 - - paddw_r2r(mm4, mm5); // tmp7 -// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ - psllw_i2r(2, mm1); - - psllw_i2r(2, mm0); - - pmulhw_m2r(fix_141, mm1); // tmp21 -// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ -// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ - psllw_i2r(2, mm3); - movq_r2r(mm0, mm7); - - pmulhw_m2r(fix_n184, mm7); - movq_r2r(mm3, mm6); - - movq_m2r(*(wsptr), mm2); // tmp0,final1 - - pmulhw_m2r(fix_108n184, mm6); -// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ -// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm2, mm4); // final1 - - pmulhw_m2r(fix_184n261, mm0); - paddw_r2r(mm5, mm2); // tmp0+tmp7,final1 - - pmulhw_m2r(fix_184, mm3); - psubw_r2r(mm5, mm4); // tmp0-tmp7,final1 - -// tmp6 = tmp22 - tmp7; /* phase 2 */ - psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1 - - paddw_r2r(mm6, mm7); // tmp20 - psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1 - - paddw_r2r(mm0, mm3); // tmp22 - -// tmp5 = tmp21 - tmp6; - psubw_r2r(mm5, mm3); // tmp6 - -// tmp4 = tmp20 + tmp5; - movq_m2r(*(wsptr+1), mm0); // tmp1,final2 - psubw_r2r(mm3, mm1); // tmp5 - - movq_r2r(mm0, mm6); // final2 - paddw_r2r(mm3, mm0); // tmp1+tmp6,final2 - - /* Final output stage: scale down by a factor of 8 and range-limit */ - - -// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) -// & RANGE_MASK]; final1 - - -// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) -// & RANGE_MASK]; final2 - psubw_r2r(mm3, mm6); // tmp1-tmp6,final2 - psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1] - - psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6] - - packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] - - movq_m2r(*(wsptr+2), mm5); // tmp2,final3 - packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] - -// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) -// & RANGE_MASK]; final3 - paddw_r2r(mm1, mm7); // tmp4 - movq_r2r(mm5, mm3); - - paddw_r2r(mm1, mm5); // tmp2+tmp5 - psubw_r2r(mm1, mm3); // tmp2-tmp5 - - psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2] - - movq_m2r(*(wsptr+3), mm4); // tmp3,final4 - psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5] - - - -// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) -// & RANGE_MASK]; final4 - movq_r2r(mm4, mm6); - paddw_r2r(mm7, mm4); // tmp3+tmp4 - - psubw_r2r(mm7, mm6); // tmp3-tmp4 - psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4] - - // mov ecx, [dataptr] - - psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3] - - packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] - - packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] - movq_r2r(mm2, mm4); - - movq_r2r(mm5, mm7); - punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] - - punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] - movq_r2r(mm2, mm1); - - punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] - - // add dataptr, 4 - - punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] - - punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] - - // add ecx, output_col - - movq_r2r(mm7, mm6); - punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] - - movq_r2r(mm2, mm0); - punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] - - // mov idata, [dataptr] - - punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] - - // add dataptr, 4 - - movq_r2r(mm1, mm3); - - // add idata, output_col - - punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] - - movq_r2m(mm2, *(dataptr)); - - punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] - - dataptr += rskip; - movq_r2m(mm0, *(dataptr)); - - punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] - punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] - - dataptr += rskip; - movq_r2m(mm1, *(dataptr)); - - dataptr += rskip; - movq_r2m(mm3, *(dataptr)); - -/*******************************************************************/ - - wsptr += 8; - -/*******************************************************************/ - -// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); -// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); -// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); -// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); - movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3] - - movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7] - movq_r2r(mm0, mm2); - - movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3] - paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] - - movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7] - psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] - - movq_r2r(mm0, mm6); - movq_r2r(mm3, mm5); - - paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] - movq_r2r(mm2, mm1); - - psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] - punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] - - movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7] - punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] - - movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] - - punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] - movq_r2r(mm3, mm4); - - movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3] - punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] - - movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] - movq_r2r(mm6, mm2); - - psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] - paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] - - movq_r2r(mm3, mm5); - punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] - - psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] - punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] - - movq_r2r(mm4, mm7); - punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] - - punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] - - punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] - - punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] - movq_r2r(mm1, mm6); - - //OK - -// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] -// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - movq_r2r(mm0, mm2); - punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] - - punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] - psllw_i2r(2, mm6); - - pmulhw_m2r(fix_141, mm6); - punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] - - punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] - movq_r2r(mm0, mm7); - -// tmp0 = tmp10 + tmp13; -// tmp3 = tmp10 - tmp13; - paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] - psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] - -// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; - psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] -// tmp1 = tmp11 + tmp12; -// tmp2 = tmp11 - tmp12; - movq_r2r(mm1, mm5); - - //OK - - - /* Odd part */ - -// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; -// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; -// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; -// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; - movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3] - paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] - - movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7] - psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] - - movq_r2r(mm3, mm6); - punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5] - - punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3] - movq_r2r(mm3, mm2); - -//Save tmp0 and tmp1 in wsptr - movq_r2m(mm0, *(wsptr)); // save tmp0 - paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13] - - -//Continue with z10 --- z13 - movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3] - psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10] - - movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7] - movq_r2r(mm6, mm4); - - movq_r2m(mm1, *(wsptr+1)); // save tmp1 - punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5] - - punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3] - movq_r2r(mm6, mm1); - -//Save tmp2 and tmp3 in wsptr - paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13] - movq_r2r(mm2, mm4); - -//Continue with z10 --- z13 - movq_r2m(mm5, *(wsptr+2)); // save tmp2 - punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11] - - psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10] - punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13] - - movq_r2r(mm3, mm0); - punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12] - - movq_r2m(mm7, *(wsptr+3)); // save tmp3 - punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10] - - movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11] - - movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13] - - movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3] - movq_r2r(mm6, mm4); - - punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5] - movq_r2r(mm1, mm5); - - punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3] - movq_r2r(mm6, mm2); - - movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7] - paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13] - - psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10] - punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5] - - punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3] - movq_r2r(mm1, mm7); - - paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13] - psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10] - - movq_r2r(mm6, mm5); - punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11] - - punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13] - movq_r2r(mm2, mm4); - - punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12] - - punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10] - - punpckhdq_r2r(mm6, mm4); // wsptr[2,z10],[3,z10],[2,z11],[3,z11] - - punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13] - movq_r2r(mm0, mm5); - - punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10] - - punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11] - movq_r2r(mm3, mm4); - - punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13] - movq_r2r(mm5, mm1); - - punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12] -// tmp7 = z11 + z13; /* phase 5 */ -// tmp8 = z11 - z13; /* phase 5 */ - psubw_r2r(mm4, mm1); // tmp8 - - paddw_r2r(mm4, mm5); // tmp7 -// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ - psllw_i2r(2, mm1); - - psllw_i2r(2, mm0); - - pmulhw_m2r(fix_141, mm1); // tmp21 -// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ -// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ - psllw_i2r(2, mm3); - movq_r2r(mm0, mm7); - - pmulhw_m2r(fix_n184, mm7); - movq_r2r(mm3, mm6); - - movq_m2r(*(wsptr), mm2); // tmp0,final1 - - pmulhw_m2r(fix_108n184, mm6); -// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ -// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm2, mm4); // final1 - - pmulhw_m2r(fix_184n261, mm0); - paddw_r2r(mm5, mm2); // tmp0+tmp7,final1 - - pmulhw_m2r(fix_184, mm3); - psubw_r2r(mm5, mm4); // tmp0-tmp7,final1 - -// tmp6 = tmp22 - tmp7; /* phase 2 */ - psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1 - - paddw_r2r(mm6, mm7); // tmp20 - psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1 - - paddw_r2r(mm0, mm3); // tmp22 - -// tmp5 = tmp21 - tmp6; - psubw_r2r(mm5, mm3); // tmp6 - -// tmp4 = tmp20 + tmp5; - movq_m2r(*(wsptr+1), mm0); // tmp1,final2 - psubw_r2r(mm3, mm1); // tmp5 - - movq_r2r(mm0, mm6); // final2 - paddw_r2r(mm3, mm0); // tmp1+tmp6,final2 - - /* Final output stage: scale down by a factor of 8 and range-limit */ - -// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) -// & RANGE_MASK]; final1 - - -// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) -// & RANGE_MASK]; final2 - psubw_r2r(mm3, mm6); // tmp1-tmp6,final2 - psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1] - - psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6] - - packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] - - movq_m2r(*(wsptr+2), mm5); // tmp2,final3 - packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] - -// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) -// & RANGE_MASK]; final3 - paddw_r2r(mm1, mm7); // tmp4 - movq_r2r(mm5, mm3); - - paddw_r2r(mm1, mm5); // tmp2+tmp5 - psubw_r2r(mm1, mm3); // tmp2-tmp5 - - psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2] - - movq_m2r(*(wsptr+3), mm4); // tmp3,final4 - psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5] - - - -// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) -// & RANGE_MASK]; final4 - movq_r2r(mm4, mm6); - paddw_r2r(mm7, mm4); // tmp3+tmp4 - - psubw_r2r(mm7, mm6); // tmp3-tmp4 - psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4] - - psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3] - - /* - movq_r2m(mm4, *dummy); - fprintf(stderr, "3-4 %016llx\n", dummy); - movq_r2m(mm4, *dummy); - fprintf(stderr, "3+4 %016llx\n", dummy); - */ - - - packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] - - packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] - movq_r2r(mm2, mm4); - - movq_r2r(mm5, mm7); - punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] - - punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] - movq_r2r(mm2, mm1); - - punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] - - punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] - - punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] - - movq_r2r(mm7, mm6); - punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] - - movq_r2r(mm2, mm0); - punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] - - punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] - - movq_r2r(mm1, mm3); - - punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] - - dataptr += rskip; - movq_r2m(mm2, *(dataptr)); - - punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] - - dataptr += rskip; - movq_r2m(mm0, *(dataptr)); - - punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] - - punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] - - dataptr += rskip; - movq_r2m(mm1, *(dataptr)); - - dataptr += rskip; - movq_r2m(mm3, *(dataptr)); - -#else - __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - __s32 tmp10, tmp11, tmp12, tmp13; - __s32 z5, z10, z11, z12, z13; - __s16 *inptr; - __s32 *wsptr; - __u8 *outptr; - int ctr; - __s32 dcval; - __s32 workspace[64]; - - inptr = data; - wsptr = workspace; - for (ctr = 8; ctr > 0; ctr--) { - - if ((inptr[8] | inptr[16] | inptr[24] | - inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) { - dcval = inptr[0]; - wsptr[0] = dcval; - wsptr[8] = dcval; - wsptr[16] = dcval; - wsptr[24] = dcval; - wsptr[32] = dcval; - wsptr[40] = dcval; - wsptr[48] = dcval; - wsptr[56] = dcval; - - inptr++; - wsptr++; - continue; - } - - tmp0 = inptr[0]; - tmp1 = inptr[16]; - tmp2 = inptr[32]; - tmp3 = inptr[48]; - - tmp10 = tmp0 + tmp2; - tmp11 = tmp0 - tmp2; - - tmp13 = tmp1 + tmp3; - tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; - - tmp0 = tmp10 + tmp13; - tmp3 = tmp10 - tmp13; - tmp1 = tmp11 + tmp12; - tmp2 = tmp11 - tmp12; - - tmp4 = inptr[8]; - tmp5 = inptr[24]; - tmp6 = inptr[40]; - tmp7 = inptr[56]; - - z13 = tmp6 + tmp5; - z10 = tmp6 - tmp5; - z11 = tmp4 + tmp7; - z12 = tmp4 - tmp7; - - tmp7 = z11 + z13; - tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); - - z5 = MULTIPLY(z10 + z12, FIX_1_847759065); - tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; - - tmp6 = tmp12 - tmp7; - tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; - - wsptr[0] = (__s32) (tmp0 + tmp7); - wsptr[56] = (__s32) (tmp0 - tmp7); - wsptr[8] = (__s32) (tmp1 + tmp6); - wsptr[48] = (__s32) (tmp1 - tmp6); - wsptr[16] = (__s32) (tmp2 + tmp5); - wsptr[40] = (__s32) (tmp2 - tmp5); - wsptr[32] = (__s32) (tmp3 + tmp4); - wsptr[24] = (__s32) (tmp3 - tmp4); - - inptr++; - wsptr++; - } - - wsptr = workspace; - for (ctr = 0; ctr < 8; ctr++) { - outptr = &(odata[ctr*rskip]); - - tmp10 = wsptr[0] + wsptr[4]; - tmp11 = wsptr[0] - wsptr[4]; - - tmp13 = wsptr[2] + wsptr[6]; - tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13; - - tmp0 = tmp10 + tmp13; - tmp3 = tmp10 - tmp13; - tmp1 = tmp11 + tmp12; - tmp2 = tmp11 - tmp12; - - z13 = wsptr[5] + wsptr[3]; - z10 = wsptr[5] - wsptr[3]; - z11 = wsptr[1] + wsptr[7]; - z12 = wsptr[1] - wsptr[7]; - - tmp7 = z11 + z13; - tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); - - z5 = MULTIPLY(z10 + z12, FIX_1_847759065); - tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; - - tmp6 = tmp12 - tmp7; - tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; - - outptr[0] = RL(DESCALE(tmp0 + tmp7)); - outptr[7] = RL(DESCALE(tmp0 - tmp7)); - outptr[1] = RL(DESCALE(tmp1 + tmp6)); - outptr[6] = RL(DESCALE(tmp1 - tmp6)); - outptr[2] = RL(DESCALE(tmp2 + tmp5)); - outptr[5] = RL(DESCALE(tmp2 - tmp5)); - outptr[4] = RL(DESCALE(tmp3 + tmp4)); - outptr[3] = RL(DESCALE(tmp3 - tmp4)); - - wsptr += 8; - } -#endif -} -/* - -Main Routines - -This file contains most of the initialisation and control functions - -(C) Justin Schoeman 1998 - -*/ - -/* - -Private function - -Initialise all the cache-aliged data blocks - -*/ - -void RTjpeg_init_data(void) -{ - unsigned long dptr; - - dptr=(unsigned long)&(RTjpeg_alldata[0]); - dptr+=32; - dptr=dptr>>5; - dptr=dptr<<5; /* cache align data */ - - RTjpeg_block=(__s16 *)dptr; - dptr+=sizeof(__s16)*64; - RTjpeg_lqt=(__s32 *)dptr; - dptr+=sizeof(__s32)*64; - RTjpeg_cqt=(__s32 *)dptr; - dptr+=sizeof(__s32)*64; - RTjpeg_liqt=(__u32 *)dptr; - dptr+=sizeof(__u32)*64; - RTjpeg_ciqt=(__u32 *)dptr; -} - -/* - -External Function - -Re-set quality factor - -Input: buf -> pointer to 128 ints for quant values store to pass back to - init_decompress. - Q -> quality factor (192=best, 32=worst) -*/ - -void RTjpeg_init_Q(__u8 Q) -{ - int i; - __u64 qual; - - qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ - - for(i=0; i<64; i++) - { - RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); - if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; - RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); - if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; - RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); - RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); - RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; - RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; - } - - RTjpeg_lb8=0; - while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); - RTjpeg_lb8--; - RTjpeg_cb8=0; - while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); - RTjpeg_cb8--; - - RTjpeg_dct_init(); - RTjpeg_idct_init(); - RTjpeg_quant_init(); -} - -/* - -External Function - -Initialise compression. - -Input: buf -> pointer to 128 ints for quant values store to pass back to - init_decompress. - width -> width of image - height -> height of image - Q -> quality factor (192=best, 32=worst) - -*/ - -void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q) -{ - int i; - __u64 qual; - - RTjpeg_init_data(); - - RTjpeg_width=width; - RTjpeg_height=height; - RTjpeg_Ywidth = RTjpeg_width>>3; - RTjpeg_Ysize=width * height; - RTjpeg_Cwidth = RTjpeg_width>>4; - RTjpeg_Csize= (width>>1) * height; - - qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ - - for(i=0; i<64; i++) - { - RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); - if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; - RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); - if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; - RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); - RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); - RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; - RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; - } - - RTjpeg_lb8=0; - while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); - RTjpeg_lb8--; - RTjpeg_cb8=0; - while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); - RTjpeg_cb8--; - - RTjpeg_dct_init(); - RTjpeg_quant_init(); - - for(i=0; i<64; i++) - buf[i]=RTjpeg_liqt[i]; - for(i=0; i<64; i++) - buf[64+i]=RTjpeg_ciqt[i]; -} - -void RTjpeg_init_decompress(__u32 *buf, int width, int height) -{ - int i; - - RTjpeg_init_data(); - - RTjpeg_width=width; - RTjpeg_height=height; - RTjpeg_Ywidth = RTjpeg_width>>3; - RTjpeg_Ysize=width * height; - RTjpeg_Cwidth = RTjpeg_width>>4; - RTjpeg_Csize= (width>>1) * height; - - for(i=0; i<64; i++) - { - RTjpeg_liqt[i]=buf[i]; - RTjpeg_ciqt[i]=buf[i+64]; - } - - RTjpeg_lb8=0; - while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); - RTjpeg_lb8--; - RTjpeg_cb8=0; - while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); - RTjpeg_cb8--; - - RTjpeg_idct_init(); - -// RTjpeg_color_init(); -} - -int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp) -{ - __s8 * sb; - register __s8 * bp1 = bp + (RTjpeg_width<<3); - register __s8 * bp2 = bp + RTjpeg_Ysize; - register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); - register int i, j, k; - -#ifdef MMX - emms(); -#endif - sb=sp; -/* Y */ - for(i=RTjpeg_height>>1; i; i-=8) - { - for(j=0, k=0; j>1); - } - if(*sp==-1)sp++; - else - { - sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); - RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1); - } - } - bp+=RTjpeg_width<<3; - bp2+=RTjpeg_width<<2; - bp3+=RTjpeg_width<<2; - } -#ifdef MMX - emms(); -#endif -} - -void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp) -{ - register __s8 * bp1 = bp + (RTjpeg_width<<3); - register __s8 * bp2 = bp + RTjpeg_Ysize; - register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); - int i, j,k; - -#ifdef MMX - emms(); -#endif - -/* Y */ - for(i=RTjpeg_height>>1; i; i-=8) - { - for(k=0, j=0; j>1); - } - if(*sp==-1)sp++; - else - { - sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); - RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1); - } - } - bp+=RTjpeg_width<<4; - bp1+=RTjpeg_width<<4; - bp2+=RTjpeg_width<<2; - bp3+=RTjpeg_width<<2; - } -#ifdef MMX - emms(); -#endif -} - -void RTjpeg_decompress8(__s8 *sp, __u8 *bp) -{ - int i, j; - -#ifdef MMX - emms(); -#endif - -/* Y */ - for(i=0; i>5; - RTjpeg_old=(__s16 *)(tmp<<5); - } - if (!RTjpeg_old) - { - fprintf(stderr, "RTjpeg: Could not allocate memory\n"); - exit(-1); - } - bzero(RTjpeg_old, ((4*RTjpeg_width*RTjpeg_height))); -} - -#ifdef MMX - -int RTjpeg_bcomp(__s16 *old, mmx_t *mask) -{ - int i; - mmx_t *mold=(mmx_t *)old; - mmx_t *mblock=(mmx_t *)RTjpeg_block; - volatile mmx_t result; - static mmx_t neg=(mmx_t)(unsigned long long)0xffffffffffffffffULL; - - movq_m2r(*mask, mm7); - movq_m2r(neg, mm6); - pxor_r2r(mm5, mm5); - - for(i=0; i<8; i++) - { - movq_m2r(*(mblock++), mm0); - movq_m2r(*(mblock++), mm2); - movq_m2r(*(mold++), mm1); - movq_m2r(*(mold++), mm3); - psubsw_r2r(mm1, mm0); - psubsw_r2r(mm3, mm2); - movq_r2r(mm0, mm1); - movq_r2r(mm2, mm3); - pcmpgtw_r2r(mm7, mm0); - pcmpgtw_r2r(mm7, mm2); - pxor_r2r(mm6, mm1); - pxor_r2r(mm6, mm3); - pcmpgtw_r2r(mm7, mm1); - pcmpgtw_r2r(mm7, mm3); - por_r2r(mm0, mm5); - por_r2r(mm2, mm5); - por_r2r(mm1, mm5); - por_r2r(mm3, mm5); - } - movq_r2m(mm5, result); - - if(result.q) - { -// if(!RTjpeg_mtest) -// for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i]; - return 0; - } -// printf("."); - return 1; -} - -#else -int RTjpeg_bcomp(__s16 *old, __u16 *mask) -{ - int i; - - for(i=0; i<64; i++) - if(abs(old[i]-RTjpeg_block[i])>*mask) - { - if(!RTjpeg_mtest) - for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i]; - return 0; - } - return 1; -} -#endif - -void RTjpeg_set_test(int i) -{ - RTjpeg_mtest=i; -} - -int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask) -{ - __s8 * sb; -//rh __s16 *block; - register __s8 * bp1 = bp + (RTjpeg_width<<3); - register __s8 * bp2 = bp + RTjpeg_Ysize; - register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); - register int i, j, k; - -#ifdef MMX - emms(); - RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask); - RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask); -#else - RTjpeg_lmask=lmask; - RTjpeg_cmask=cmask; -#endif - - sb=sp; - block=RTjpeg_old; -/* Y */ - for(i=RTjpeg_height>>1; i; i-=8) - { - for(j=0, k=0; j>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+cbB)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - - y=(bufy[j+1]-16)*Ky; - - tmp=(y+crR)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+cbB)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - - } - bufy+=yskip; - } -} - - -void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride) -{ - int tmp; - int i, j; - __s32 y, crR, crG, cbG, cbB; - __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; - int oskip, yskip; - - if(stride==0) - oskip=RTjpeg_width*3; - else - oskip=2*stride-RTjpeg_width*3; - - yskip=RTjpeg_width; - - bufcb=&buf[RTjpeg_width*RTjpeg_height]; - bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4]; - bufy=&buf[0]; - bufoute=rgb; - bufouto=rgb+RTjpeg_width*3; - - for(i=0; i<(RTjpeg_height>>1); i++) - { - for(j=0; j>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+cbB)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - - y=(bufy[j+1]-16)*Ky; - - tmp=(y+crR)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+cbB)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - - y=(bufy[j+yskip]-16)*Ky; - - tmp=(y+crR)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+cbB)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - - y=(bufy[j+1+yskip]-16)*Ky; - - tmp=(y+crR)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+cbB)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - - } - bufoute+=oskip; - bufouto+=oskip; - bufy+=yskip<<1; - } -} - - -void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride) -{ - int tmp; - int i, j; - __s32 y, crR, crG, cbG, cbB; - __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; - int oskip, yskip; - - if(stride==0) - oskip=RTjpeg_width*4; - else - oskip = 2*stride-RTjpeg_width*4; - yskip=RTjpeg_width; - - bufcb=&buf[RTjpeg_width*RTjpeg_height]; - bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2]; - bufy=&buf[0]; - bufoute=rgb; - bufouto=rgb+RTjpeg_width*4; - - for(i=0; i<(RTjpeg_height>>1); i++) - { - for(j=0; j>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - bufoute++; - - y=(bufy[j+1]-16)*Ky; - - tmp=(y+cbB)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - bufoute++; - - y=(bufy[j+yskip]-16)*Ky; - - tmp=(y+cbB)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - bufouto++; - - y=(bufy[j+1+yskip]-16)*Ky; - - tmp=(y+cbB)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - bufouto++; - - } - bufoute+=oskip; - bufouto+=oskip; - bufy+=yskip<<1; - } -} - -void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride) -{ - int tmp; - int i, j; - __s32 y, crR, crG, cbG, cbB; - __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; - int oskip, yskip; - - if(stride==0) - oskip=RTjpeg_width*3; - else - oskip=2*stride - RTjpeg_width*3; - - yskip=RTjpeg_width; - - bufcb=&buf[RTjpeg_width*RTjpeg_height]; - bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4]; - bufy=&buf[0]; - bufoute=rgb; - bufouto=rgb+RTjpeg_width*3; - - for(i=0; i<(RTjpeg_height>>1); i++) - { - for(j=0; j>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - - y=(bufy[j+1]-16)*Ky; - - tmp=(y+cbB)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); - - y=(bufy[j+yskip]-16)*Ky; - - tmp=(y+cbB)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - - y=(bufy[j+1+yskip]-16)*Ky; - - tmp=(y+cbB)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); - - } - bufoute+=oskip; - bufouto+=oskip; - bufy+=yskip<<1; - } -} - -void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride) -{ - int tmp; - int i, j; - __s32 y, crR, crG, cbG, cbB; - __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; - int oskip, yskip; - unsigned char r, g, b; - - if(stride==0) - oskip=RTjpeg_width*2; - else - oskip=2*stride-RTjpeg_width*2; - - yskip=RTjpeg_width; - - bufcb=&buf[RTjpeg_width*RTjpeg_height]; - bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4]; - bufy=&buf[0]; - bufoute=rgb; - bufouto=rgb+RTjpeg_width*2; - - for(i=0; i<(RTjpeg_height>>1); i++) - { - for(j=0; j>16; - b=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - g=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - r=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(int)((int)b >> 3); - tmp|=(int)(((int)g >> 2) << 5); - tmp|=(int)(((int)r >> 3) << 11); - *(bufoute++)=tmp&0xff; - *(bufoute++)=tmp>>8; - - - y=(bufy[j+1]-16)*Ky; - - tmp=(y+cbB)>>16; - b=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - g=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - r=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(int)((int)b >> 3); - tmp|=(int)(((int)g >> 2) << 5); - tmp|=(int)(((int)r >> 3) << 11); - *(bufoute++)=tmp&0xff; - *(bufoute++)=tmp>>8; - - y=(bufy[j+yskip]-16)*Ky; - - tmp=(y+cbB)>>16; - b=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - g=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - r=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(int)((int)b >> 3); - tmp|=(int)(((int)g >> 2) << 5); - tmp|=(int)(((int)r >> 3) << 11); - *(bufouto++)=tmp&0xff; - *(bufouto++)=tmp>>8; - - y=(bufy[j+1+yskip]-16)*Ky; - - tmp=(y+cbB)>>16; - b=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y-crG-cbG)>>16; - g=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(y+crR)>>16; - r=(tmp>255)?255:((tmp<0)?0:tmp); - tmp=(int)((int)b >> 3); - tmp|=(int)(((int)g >> 2) << 5); - tmp|=(int)(((int)r >> 3) << 11); - *(bufouto++)=tmp&0xff; - *(bufouto++)=tmp>>8; - - } - bufoute+=oskip; - bufouto+=oskip; - bufy+=yskip<<1; - } -} - -/* fix stride */ - -void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride) -{ - bcopy(buf, rgb, RTjpeg_width*RTjpeg_height); -} - diff -r fd85802f755b -r 628c85c15c7b RTjpegN.h --- a/RTjpegN.h Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -/* - RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za) - - With modifications by: - (c) 1998, 1999 by Joerg Walter - and - (c) 1999 by Wim Taymans - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -*/ - -#ifndef _I386_TYPES_H -typedef unsigned char __u8; -typedef unsigned short __u16; -typedef unsigned long __u32; -typedef unsigned long long __u64; -typedef signed char __s8; -typedef signed short __s16; -typedef signed long __s32; -#endif - -extern void RTjpeg_init_Q(__u8 Q); -extern void RTjpeg_init_compress(long unsigned int *buf, int width, int height, __u8 Q); -extern void RTjpeg_init_decompress(long unsigned int *buf, int width, int height); -extern int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp); -extern int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp); -extern void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp); -extern void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp); -extern int RTjpeg_compress8(__s8 *sp, unsigned char *bp); -extern void RTjpeg_decompress8(__s8 *sp, __u8 *bp); - -extern void RTjpeg_init_mcompress(void); -extern int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask); -extern int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask); -extern int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask); -extern void RTjpeg_set_test(int i); - -extern void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride); -extern void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride); -extern void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride); -extern void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride); -extern void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride); -extern void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride); - - diff -r fd85802f755b -r 628c85c15c7b alaw.h --- a/alaw.h Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -// Generated by TOOLS/alaw-gen.c - -short alaw2short[]={ - -5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736, - -7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784, - -2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368, - -3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392, --22016,-20992,-24064,-23040,-17920,-16896,-19968,-18944, --30208,-29184,-32256,-31232,-26112,-25088,-28160,-27136, --11008,-10496,-12032,-11520, -8960, -8448, -9984, -9472, --15104,-14592,-16128,-15616,-13056,-12544,-14080,-13568, - -344, -328, -376, -360, -280, -264, -312, -296, - -472, -456, -504, -488, -408, -392, -440, -424, - -88, -72, -120, -104, -24, -8, -56, -40, - -216, -200, -248, -232, -152, -136, -184, -168, - -1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184, - -1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696, - -688, -656, -752, -720, -560, -528, -624, -592, - -944, -912, -1008, -976, -816, -784, -880, -848, - 5504, 5248, 6016, 5760, 4480, 4224, 4992, 4736, - 7552, 7296, 8064, 7808, 6528, 6272, 7040, 6784, - 2752, 2624, 3008, 2880, 2240, 2112, 2496, 2368, - 3776, 3648, 4032, 3904, 3264, 3136, 3520, 3392, - 22016, 20992, 24064, 23040, 17920, 16896, 19968, 18944, - 30208, 29184, 32256, 31232, 26112, 25088, 28160, 27136, - 11008, 10496, 12032, 11520, 8960, 8448, 9984, 9472, - 15104, 14592, 16128, 15616, 13056, 12544, 14080, 13568, - 344, 328, 376, 360, 280, 264, 312, 296, - 472, 456, 504, 488, 408, 392, 440, 424, - 88, 72, 120, 104, 24, 8, 56, 40, - 216, 200, 248, 232, 152, 136, 184, 168, - 1376, 1312, 1504, 1440, 1120, 1056, 1248, 1184, - 1888, 1824, 2016, 1952, 1632, 1568, 1760, 1696, - 688, 656, 752, 720, 560, 528, 624, 592, - 944, 912, 1008, 976, 816, 784, 880, 848 -}; - -short ulaw2short[]={ --32124,-31100,-30076,-29052,-28028,-27004,-25980,-24956, --23932,-22908,-21884,-20860,-19836,-18812,-17788,-16764, --15996,-15484,-14972,-14460,-13948,-13436,-12924,-12412, --11900,-11388,-10876,-10364, -9852, -9340, -8828, -8316, - -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140, - -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092, - -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004, - -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980, - -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436, - -1372, -1308, -1244, -1180, -1116, -1052, -988, -924, - -876, -844, -812, -780, -748, -716, -684, -652, - -620, -588, -556, -524, -492, -460, -428, -396, - -372, -356, -340, -324, -308, -292, -276, -260, - -244, -228, -212, -196, -180, -164, -148, -132, - -120, -112, -104, -96, -88, -80, -72, -64, - -56, -48, -40, -32, -24, -16, -8, 0, - 32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956, - 23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764, - 15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412, - 11900, 11388, 10876, 10364, 9852, 9340, 8828, 8316, - 7932, 7676, 7420, 7164, 6908, 6652, 6396, 6140, - 5884, 5628, 5372, 5116, 4860, 4604, 4348, 4092, - 3900, 3772, 3644, 3516, 3388, 3260, 3132, 3004, - 2876, 2748, 2620, 2492, 2364, 2236, 2108, 1980, - 1884, 1820, 1756, 1692, 1628, 1564, 1500, 1436, - 1372, 1308, 1244, 1180, 1116, 1052, 988, 924, - 876, 844, 812, 780, 748, 716, 684, 652, - 620, 588, 556, 524, 492, 460, 428, 396, - 372, 356, 340, 324, 308, 292, 276, 260, - 244, 228, 212, 196, 180, 164, 148, 132, - 120, 112, 104, 96, 88, 80, 72, 64, - 56, 48, 40, 32, 24, 16, 8, 0 -}; diff -r fd85802f755b -r 628c85c15c7b cinepak.c --- a/cinepak.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,886 +0,0 @@ -/* ------------------------------------------------------------------------ - * Radius Cinepak Video Decoder - * - * Dr. Tim Ferguson, 2001. - * For more details on the algorithm: - * http://www.csse.monash.edu.au/~timf/videocodec.html - * - * This is basically a vector quantiser with adaptive vector density. The - * frame is segmented into 4x4 pixel blocks, and each block is coded using - * either 1 or 4 vectors. - * - * There are still some issues with this code yet to be resolved. In - * particular with decoding in the strip boundaries. - * ------------------------------------------------------------------------ */ -#include -#include -#include -#include -#include -#include - -#include "config.h" -#include "mp_msg.h" -#include "bswap.h" - -#include "libvo/img_format.h" -#include "mp_image.h" - -#define DBUG 0 -#define MAX_STRIPS 32 - -/* ------------------------------------------------------------------------ */ -typedef struct -{ - unsigned char y0, y1, y2, y3; - char u, v; - - // These variables are for YV12 output: The v1 vars are for - // when the vector is doublesized and used by itself to paint a - // 4x4 block. - // This quad (y0 y0 y1 y1) is used on the 2 upper rows. - unsigned long yv12_v1_u; - // This quad (y2 y2 y3 y3) is used on the 2 lower rows. - unsigned long yv12_v1_l; - // The v4 vars are for when the vector is used as 1 of 4 vectors - // to paint a 4x4 block. - // Upper pair (y0 y1): - unsigned short yv12_v4_u; - // Lower pair (y2 y3): - unsigned short yv12_v4_l; - - // These longs are for YUY2 output: The v1 vars are for when the - // vector is doublesized and used by itself to paint a 4x4 block. - // The names stand for the upper-left, upper-right, - // lower-left, and lower-right YUY2 pixel pairs. - unsigned long yuy2_v1_ul, yuy2_v1_ur; - unsigned long yuy2_v1_ll, yuy2_v1_lr; - // The v4 vars are for when the vector is used as 1 of 4 vectors - // to paint a 4x4 block. The names stand for upper and lower - // YUY2 pixel pairs. - unsigned long yuy2_v4_u, yuy2_v4_l; - - // These longs are for BGR32 output - unsigned long rgb0, rgb1, rgb2, rgb3; - - // These char arrays are for BGR24 output - unsigned char r[4], g[4], b[4]; -} cvid_codebook; - -typedef struct { - cvid_codebook *v4_codebook[MAX_STRIPS]; - cvid_codebook *v1_codebook[MAX_STRIPS]; - unsigned long strip_num; -} cinepak_info; - - -/* ------------------------------------------------------------------------ */ -static unsigned char *in_buffer, uiclip[1024], *uiclp = NULL; - -#define SCALEBITS 16 -#define ONE_HALF ((long) 1 << (SCALEBITS-1)) -#define FIX(x) ((long) ((x) * (1L<y1)<<24)|(cb->y1<<16)|(((unsigned char)cb->y0)<<8)|(cb->y0)) -#define PACK_YV12_V4_Y(cb,y0,y1) le2me_16((((unsigned char)cb->y1)<<8)|(cb->y0)) - -static inline void read_codebook_yv12(cvid_codebook *c, int mode) -{ -unsigned char y0, y1, y2, y3, u, v; -int y_uv; - - if(mode) /* black and white */ - { - c->y0 = get_byte(); - c->y1 = get_byte(); - c->y2 = get_byte(); - c->y3 = get_byte(); - c->u = c->v = 128; - } - else /* colour */ - { - y0 = get_byte(); /* luma */ - y1 = get_byte(); - y2 = get_byte(); - y3 = get_byte(); - u = 128+get_byte(); /* chroma */ - v = 128+get_byte(); - - /* YUV * inv(CinYUV) - * | Y | | 1 -0.0655 0.0110 | | CY | - * | Cb | = | 0 1.1656 -0.0062 | | CU | - * | Cr | | 0 0.0467 1.4187 | | CV | - */ - y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS); - c->y0 = uiclp[y0 + y_uv]; - c->y1 = uiclp[y1 + y_uv]; - c->y2 = uiclp[y2 + y_uv]; - c->y3 = uiclp[y3 + y_uv]; - c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)]; - c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)]; - - c->yv12_v1_u = PACK_YV12_V1_Y(c, y0, y1); - c->yv12_v1_l = PACK_YV12_V1_Y(c, y2, y3); - c->yv12_v4_u = PACK_YV12_V4_Y(c, y0, y1); - c->yv12_v4_l = PACK_YV12_V4_Y(c, y2, y3); - } -} - -/* ---------------------------------------------------------------------- */ - -inline void cvid_v1_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) -{ -unsigned char *p; -int stride; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - // take care of the luminance - stride = mpi->stride[0]; - p = mpi->planes[0]+y*stride+x; - *((unsigned int*)p)=cb->yv12_v1_u; - *((unsigned int*)(p+stride))=cb->yv12_v1_u; - *((unsigned int*)(p+stride*2))=cb->yv12_v1_l; - *((unsigned int*)(p+stride*3))=cb->yv12_v1_l; - - // now for the chrominance - x/=2; y/=2; - - stride = mpi->stride[1]; - p = mpi->planes[1]+y*stride+x; - p[0]=p[1]=p[stride]=p[stride+1]=cb->u; - - stride = mpi->stride[2]; - p = mpi->planes[2]+y*stride+x; - p[0]=p[1]=p[stride]=p[stride+1]=cb->v; - -} - -/* ---------------------------------------------------------------------- */ -inline void cvid_v4_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, - cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) -{ -unsigned char *p; -int stride; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - // take care of the luminance - stride = mpi->stride[0]; - p = mpi->planes[0]+y*stride+x; - ((unsigned short*)p)[0]=cb0->yv12_v4_u; - ((unsigned short*)p)[1]=cb1->yv12_v4_u; - ((unsigned short*)(p+stride))[0]=cb0->yv12_v4_l; - ((unsigned short*)(p+stride))[1]=cb1->yv12_v4_l; - ((unsigned short*)(p+stride*2))[0]=cb2->yv12_v4_u; - ((unsigned short*)(p+stride*2))[1]=cb3->yv12_v4_u; - ((unsigned short*)(p+stride*3))[0]=cb2->yv12_v4_l; - ((unsigned short*)(p+stride*3))[1]=cb3->yv12_v4_l; - - // now for the chrominance - x/=2; y/=2; - - stride = mpi->stride[1]; - p = mpi->planes[1]+y*stride+x; - p[0]=cb0->u; p[1]=cb1->u; - p[stride]=cb2->u; p[stride+1]=cb3->u; - - stride = mpi->stride[2]; - p = mpi->planes[2]+y*stride+x; - p[0]=cb0->v; p[1]=cb1->v; - p[stride]=cb2->v; p[stride+1]=cb3->v; - -} - -/* ---------------------------------------------------------------------- */ - -#define PACK_YUY2(cb,y0,y1,u,v) le2me_32(((((unsigned char)cb->v)<<24)|(cb->y1<<16)|(((unsigned char)cb->u)<<8)|(cb->y0))) - -static inline void read_codebook_yuy2(cvid_codebook *c, int mode) -{ -unsigned char y0, y1, y2, y3, u, v; -int y_uv; - - if(mode) /* black and white */ - { - c->y0 = get_byte(); - c->y1 = get_byte(); - c->y2 = get_byte(); - c->y3 = get_byte(); - c->u = c->v = 128; - } - else /* colour */ - { - y0 = get_byte(); /* luma */ - y1 = get_byte(); - y2 = get_byte(); - y3 = get_byte(); - u = 128+get_byte(); /* chroma */ - v = 128+get_byte(); - - /* YUV * inv(CinYUV) - * | Y | | 1 -0.0655 0.0110 | | CY | - * | Cb | = | 0 1.1656 -0.0062 | | CU | - * | Cr | | 0 0.0467 1.4187 | | CV | - */ - y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS); - c->y0 = uiclp[y0 + y_uv]; - c->y1 = uiclp[y1 + y_uv]; - c->y2 = uiclp[y2 + y_uv]; - c->y3 = uiclp[y3 + y_uv]; - c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)]; - c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)]; - - c->yuy2_v4_u = PACK_YUY2(c, y0, y1, u, v); - c->yuy2_v4_l = PACK_YUY2(c, y2, y3, u, v); - c->yuy2_v1_ul = PACK_YUY2(c, y0, y0, u, v); - c->yuy2_v1_ur = PACK_YUY2(c, y1, y1, u, v); - c->yuy2_v1_ll = PACK_YUY2(c, y2, y2, u, v); - c->yuy2_v1_lr = PACK_YUY2(c, y3, y3, u, v); - } -} - -/* ------------------------------------------------------------------------ */ -inline void cvid_v1_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) -{ -int stride = mpi->stride[0] / 2; -unsigned long *vptr = (unsigned long *)mpi->planes[0]; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - vptr += (y * mpi->stride[0] + x) / 2; - - vptr[0] = cb->yuy2_v1_ul; - vptr[1] = cb->yuy2_v1_ur; - - vptr += stride; - vptr[0] = cb->yuy2_v1_ul; - vptr[1] = cb->yuy2_v1_ur; - - vptr += stride; - vptr[0] = cb->yuy2_v1_ll; - vptr[1] = cb->yuy2_v1_lr; - - vptr += stride; - vptr[0] = cb->yuy2_v1_ll; - vptr[1] = cb->yuy2_v1_lr; -} - - -/* ------------------------------------------------------------------------ */ -inline void cvid_v4_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, - cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) -{ -int stride = mpi->stride[0] / 2; -unsigned long *vptr = (unsigned long *)mpi->planes[0]; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - vptr += (y * mpi->stride[0] + x) / 2; - - vptr[0] = cb0->yuy2_v4_u; - vptr[1] = cb1->yuy2_v4_u; - - vptr += stride; - vptr[0] = cb0->yuy2_v4_l; - vptr[1] = cb1->yuy2_v4_l; - - vptr += stride; - vptr[0] = cb2->yuy2_v4_u; - vptr[1] = cb3->yuy2_v4_u; - - vptr += stride; - vptr[0] = cb2->yuy2_v4_l; - vptr[1] = cb3->yuy2_v4_l; -} - -/* ---------------------------------------------------------------------- */ -static inline void read_codebook_32(cvid_codebook *c, int mode) -{ -int uvr, uvg, uvb; - - if(mode) /* black and white */ - { - c->y0 = get_byte(); - c->y1 = get_byte(); - c->y2 = get_byte(); - c->y3 = get_byte(); - c->u = c->v = 0; - - c->rgb0 = (c->y0 << 16) | (c->y0 << 8) | c->y0; - c->rgb1 = (c->y1 << 16) | (c->y1 << 8) | c->y1; - c->rgb2 = (c->y2 << 16) | (c->y2 << 8) | c->y2; - c->rgb3 = (c->y3 << 16) | (c->y3 << 8) | c->y3; - } - else /* colour */ - { - c->y0 = get_byte(); /* luma */ - c->y1 = get_byte(); - c->y2 = get_byte(); - c->y3 = get_byte(); - c->u = get_byte(); /* chroma */ - c->v = get_byte(); - - uvr = c->v << 1; - uvg = -((c->u+1) >> 1) - c->v; - uvb = c->u << 1; - - c->rgb0 = le2me_32((uiclp[c->y0 + uvr] << 16) | (uiclp[c->y0 + uvg] << 8) | uiclp[c->y0 + uvb]); - c->rgb1 = le2me_32((uiclp[c->y1 + uvr] << 16) | (uiclp[c->y1 + uvg] << 8) | uiclp[c->y1 + uvb]); - c->rgb2 = le2me_32((uiclp[c->y2 + uvr] << 16) | (uiclp[c->y2 + uvg] << 8) | uiclp[c->y2 + uvb]); - c->rgb3 = le2me_32((uiclp[c->y3 + uvr] << 16) | (uiclp[c->y3 + uvg] << 8) | uiclp[c->y3 + uvb]); - } -} - - -/* ------------------------------------------------------------------------ */ -inline void cvid_v1_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) -{ -int stride = mpi->stride[0]; -unsigned long *vptr = (unsigned long *)mpi->planes[0]; -unsigned long rgb; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - vptr += (y * stride + x); - - vptr[0] = rgb = cb->rgb0; vptr[1] = rgb; - vptr[2] = rgb = cb->rgb1; vptr[3] = rgb; - vptr += stride; - vptr[0] = rgb = cb->rgb0; vptr[1] = rgb; - vptr[2] = rgb = cb->rgb1; vptr[3] = rgb; - vptr += stride; - vptr[0] = rgb = cb->rgb2; vptr[1] = rgb; - vptr[2] = rgb = cb->rgb3; vptr[3] = rgb; - vptr += stride; - vptr[0] = rgb = cb->rgb2; vptr[1] = rgb; - vptr[2] = rgb = cb->rgb3; vptr[3] = rgb; -} - - -/* ------------------------------------------------------------------------ */ -inline void cvid_v4_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, - cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) -{ -int stride = mpi->stride[0]; -unsigned long *vptr = (unsigned long *)mpi->planes[0]; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - vptr += (y * stride + x); - - vptr[0] = cb0->rgb0; - vptr[1] = cb0->rgb1; - vptr[2] = cb1->rgb0; - vptr[3] = cb1->rgb1; - vptr += stride; - vptr[0] = cb0->rgb2; - vptr[1] = cb0->rgb3; - vptr[2] = cb1->rgb2; - vptr[3] = cb1->rgb3; - vptr += stride; - vptr[0] = cb2->rgb0; - vptr[1] = cb2->rgb1; - vptr[2] = cb3->rgb0; - vptr[3] = cb3->rgb1; - vptr += stride; - vptr[0] = cb2->rgb2; - vptr[1] = cb2->rgb3; - vptr[2] = cb3->rgb2; - vptr[3] = cb3->rgb3; -} - - -/* ---------------------------------------------------------------------- */ -static inline void read_codebook_24(cvid_codebook *c, int mode) -{ -int uvr, uvg, uvb; - - if(mode) /* black and white */ - { - c->y0 = get_byte(); - c->y1 = get_byte(); - c->y2 = get_byte(); - c->y3 = get_byte(); - c->u = c->v = 0; - - c->r[0] = c->g[0] = c->b[0] = c->y0; - c->r[1] = c->g[1] = c->b[1] = c->y1; - c->r[2] = c->g[2] = c->b[2] = c->y2; - c->r[3] = c->g[3] = c->b[3] = c->y3; - } - else /* colour */ - { - c->y0 = get_byte(); /* luma */ - c->y1 = get_byte(); - c->y2 = get_byte(); - c->y3 = get_byte(); - c->u = get_byte(); /* chroma */ - c->v = get_byte(); - - uvr = c->v << 1; - uvg = -((c->u+1) >> 1) - c->v; - uvb = c->u << 1; - - c->r[0] = uiclp[c->y0 + uvr]; c->g[0] = uiclp[c->y0 + uvg]; c->b[0] = uiclp[c->y0 + uvb]; - c->r[1] = uiclp[c->y1 + uvr]; c->g[1] = uiclp[c->y1 + uvg]; c->b[1] = uiclp[c->y1 + uvb]; - c->r[2] = uiclp[c->y2 + uvr]; c->g[2] = uiclp[c->y2 + uvg]; c->b[2] = uiclp[c->y2 + uvb]; - c->r[3] = uiclp[c->y3 + uvr]; c->g[3] = uiclp[c->y3 + uvg]; c->b[3] = uiclp[c->y3 + uvb]; - } -} - - -/* ------------------------------------------------------------------------ */ -inline void cvid_v1_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) -{ -unsigned char r, g, b; -int stride = (mpi->stride[0]-4)*3; -unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - *vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; - *vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; - vptr += stride; - *vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; - *vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; - vptr += stride; - *vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; - *vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; - vptr += stride; - *vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; - *vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3]; - *vptr++ = b; *vptr++ = g; *vptr++ = r; -} - - -/* ------------------------------------------------------------------------ */ -inline void cvid_v4_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, - cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) -{ -int stride = (mpi->stride[0]-4)*3; -unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3; - - if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 - - *vptr++ = cb0->b[0]; *vptr++ = cb0->g[0]; *vptr++ = cb0->r[0]; - *vptr++ = cb0->b[1]; *vptr++ = cb0->g[1]; *vptr++ = cb0->r[1]; - *vptr++ = cb1->b[0]; *vptr++ = cb1->g[0]; *vptr++ = cb1->r[0]; - *vptr++ = cb1->b[1]; *vptr++ = cb1->g[1]; *vptr++ = cb1->r[1]; - vptr += stride; - *vptr++ = cb0->b[2]; *vptr++ = cb0->g[2]; *vptr++ = cb0->r[2]; - *vptr++ = cb0->b[3]; *vptr++ = cb0->g[3]; *vptr++ = cb0->r[3]; - *vptr++ = cb1->b[2]; *vptr++ = cb1->g[2]; *vptr++ = cb1->r[2]; - *vptr++ = cb1->b[3]; *vptr++ = cb1->g[3]; *vptr++ = cb1->r[3]; - vptr += stride; - *vptr++ = cb2->b[0]; *vptr++ = cb2->g[0]; *vptr++ = cb2->r[0]; - *vptr++ = cb2->b[1]; *vptr++ = cb2->g[1]; *vptr++ = cb2->r[1]; - *vptr++ = cb3->b[0]; *vptr++ = cb3->g[0]; *vptr++ = cb3->r[0]; - *vptr++ = cb3->b[1]; *vptr++ = cb3->g[1]; *vptr++ = cb3->r[1]; - vptr += stride; - *vptr++ = cb2->b[2]; *vptr++ = cb2->g[2]; *vptr++ = cb2->r[2]; - *vptr++ = cb2->b[3]; *vptr++ = cb2->g[3]; *vptr++ = cb2->r[3]; - *vptr++ = cb3->b[2]; *vptr++ = cb3->g[2]; *vptr++ = cb3->r[2]; - *vptr++ = cb3->b[3]; *vptr++ = cb3->g[3]; *vptr++ = cb3->r[3]; -} - -/* ------------------------------------------------------------------------ - * Call this function once at the start of the sequence and save the - * returned context for calls to decode_cinepak(). - */ -void *decode_cinepak_init(void) -{ -cinepak_info *cvinfo; -int i, x; - - if((cvinfo = calloc(sizeof(cinepak_info), 1)) == NULL) return NULL; - cvinfo->strip_num = 0; - - if(uiclp == NULL) - { - uiclp = uiclip+512; - for(i = -512; i < 512; i++) - uiclp[i] = (i < 0 ? 0 : (i > 255 ? 255 : i)); - } - - for(i = 0, x = -128; i < 256; i++, x++) - { - CU_Y_tab[i] = (-FIX(0.0655)) * x; - CV_Y_tab[i] = (FIX(0.0110)) * x + ONE_HALF; - CU_Cb_tab[i] = (FIX(1.1656)) * x; - CV_Cb_tab[i] = (-FIX(0.0062)) * x + ONE_HALF + FIX(128); - CU_Cr_tab[i] = (FIX(0.0467)) * x; - CV_Cr_tab[i] = (FIX(1.4187)) * x + ONE_HALF + FIX(128); - } - - return (void *)cvinfo; -} - - -/* ------------------------------------------------------------------------ - * This function decodes a buffer containing a Cinepak encoded frame. - * - * context - the context created by decode_cinepak_init(). - * buf - the input buffer to be decoded - * size - the size of the input buffer - * frame - the output frame buffer - * width - the width of the output frame - * height - the height of the output frame - * bit_per_pixel - the number of bits per pixel allocated to the output - * frame; depths support: - * 32: BGR32 - * 24: BGR24 - * 16: YUY2 - * 12: YV12 - */ -void decode_cinepak(void *context, unsigned char *buf, int size, mp_image_t *mpi) -{ -cinepak_info *cvinfo = (cinepak_info *)context; -cvid_codebook *v4_codebook, *v1_codebook, *codebook = NULL; -unsigned long x, y, y_bottom, frame_flags, strips, cv_width, cv_height, cnum, - strip_id, chunk_id, x0, y0, x1, y1, ci, flag, mask; -long len, top_size, chunk_size; -unsigned int i, cur_strip, d0, d1, d2, d3; -int modulo; -void (*read_codebook)(cvid_codebook *c, int mode) = NULL; -void (*cvid_v1)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) = NULL; -void (*cvid_v4)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, - cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) = NULL; - - x = y = 0; - y_bottom = 0; - in_buffer = buf; - - frame_flags = get_byte(); - len = get_byte() << 16; - len |= get_byte() << 8; - len |= get_byte(); - - switch(mpi->imgfmt) - { - case IMGFMT_I420: - case IMGFMT_IYUV: - case IMGFMT_YV12: // YV12 - read_codebook = read_codebook_yv12; - cvid_v1 = cvid_v1_yv12; - cvid_v4 = cvid_v4_yv12; - break; - case IMGFMT_YUY2: // YUY2 - read_codebook = read_codebook_yuy2; - cvid_v1 = cvid_v1_yuy2; - cvid_v4 = cvid_v4_yuy2; - break; - case IMGFMT_BGR24: // BGR24 - read_codebook = read_codebook_24; - cvid_v1 = cvid_v1_24; - cvid_v4 = cvid_v4_24; - break; - case IMGFMT_BGR32: // BGR32 - read_codebook = read_codebook_32; - cvid_v1 = cvid_v1_32; - cvid_v4 = cvid_v4_32; - break; - } - - if(len != size) - { - if(len & 0x01) len++; /* AVIs tend to have a size mismatch */ - if(len != size) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: corruption %d (QT/AVI) != %ld (CV)\n", size, len); - // return; - } - } - - cv_width = get_word(); - cv_height = get_word(); - strips = get_word(); - - if(strips > cvinfo->strip_num) - { - if(strips >= MAX_STRIPS) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: strip overflow (more than %d)\n", MAX_STRIPS); - return; - } - - for(i = cvinfo->strip_num; i < strips; i++) - { - if((cvinfo->v4_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v4 alloc err\n"); - return; - } - - if((cvinfo->v1_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v1 alloc err\n"); - return; - } - } - } - cvinfo->strip_num = strips; - -#if DBUG - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: <%ld,%ld> strips %ld\n", cv_width, cv_height, strips); -#endif - - for(cur_strip = 0; cur_strip < strips; cur_strip++) - { - v4_codebook = cvinfo->v4_codebook[cur_strip]; - v1_codebook = cvinfo->v1_codebook[cur_strip]; - - if((cur_strip > 0) && (!(frame_flags & 0x01))) - { - memcpy(cvinfo->v4_codebook[cur_strip], cvinfo->v4_codebook[cur_strip-1], 260 * sizeof(cvid_codebook)); - memcpy(cvinfo->v1_codebook[cur_strip], cvinfo->v1_codebook[cur_strip-1], 260 * sizeof(cvid_codebook)); - } - - strip_id = get_word(); /* 1000 = key strip, 1100 = iter strip */ - top_size = get_word(); - y0 = get_word(); /* FIXME: most of these are ignored at the moment */ - x0 = get_word(); - y1 = get_word(); - x1 = get_word(); - - y_bottom += y1; - top_size -= 12; - x = 0; -// if(x1 != (unsigned int)mpi->width) -// mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: Warning x1 (%ld) != width (%d)\n", x1, mpi->width); - -//x1 = mpi->width; -#if DBUG - mp_msg(MSGT_DECVIDEO, MSGL_WARN, " %d) %04lx %04ld <%ld,%ld> <%ld,%ld> yt %ld %d\n", - cur_strip, strip_id, top_size, x0, y0, x1, y1, y_bottom); -#endif - - while(top_size > 0) - { - chunk_id = get_word(); - chunk_size = get_word(); - -#if DBUG - mp_msg(MSGT_DECVIDEO, MSGL_WARN, " %04lx %04ld\n", chunk_id, chunk_size); -#endif - top_size -= chunk_size; - chunk_size -= 4; - - switch(chunk_id) - { - /* -------------------- Codebook Entries -------------------- */ - case 0x2000: - case 0x2200: - modulo = chunk_size % 6; - codebook = (chunk_id == 0x2200 ? v1_codebook : v4_codebook); - cnum = (chunk_size - modulo) / 6; - for(i = 0; i < cnum; i++) read_codebook(codebook+i, 0); - while (modulo--) - in_buffer++; - break; - - case 0x2400: - case 0x2600: /* 8 bit per pixel */ - codebook = (chunk_id == 0x2600 ? v1_codebook : v4_codebook); - cnum = chunk_size/4; - for(i = 0; i < cnum; i++) read_codebook(codebook+i, 1); - break; - - case 0x2100: - case 0x2300: - codebook = (chunk_id == 0x2300 ? v1_codebook : v4_codebook); - - ci = 0; - while(chunk_size > 3) - { - flag = get_long(); - chunk_size -= 4; - - for(i = 0; i < 32; i++) - { - if(flag & 0x80000000) - { - chunk_size -= 6; - read_codebook(codebook+ci, 0); - } - - ci++; - flag <<= 1; - } - } - while(chunk_size > 0) { skip_byte(); chunk_size--; } - break; - - case 0x2500: - case 0x2700: /* 8 bit per pixel */ - codebook = (chunk_id == 0x2700 ? v1_codebook : v4_codebook); - - ci = 0; - while(chunk_size > 0) - { - flag = get_long(); - chunk_size -= 4; - - for(i = 0; i < 32; i++) - { - if(flag & 0x80000000) - { - chunk_size -= 4; - read_codebook(codebook+ci, 1); - } - - ci++; - flag <<= 1; - } - } - while(chunk_size > 0) { skip_byte(); chunk_size--; } - break; - - /* -------------------- Frame -------------------- */ - case 0x3000: - while((chunk_size > 0) && (y < y_bottom)) - { - flag = get_long(); - chunk_size -= 4; - - for(i = 0; i < 32; i++) - { - if(y >= y_bottom) break; - if(flag & 0x80000000) /* 4 bytes per block */ - { - d0 = get_byte(); - d1 = get_byte(); - d2 = get_byte(); - d3 = get_byte(); - chunk_size -= 4; - cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3); - } - else /* 1 byte per block */ - { - cvid_v1(mpi, x, y, v1_codebook + get_byte()); - chunk_size--; - } - - x += 4; - if(x >= (unsigned int)x1) - { - x = 0; - y += 4; - } - flag <<= 1; - } - } - while(chunk_size > 0) { skip_byte(); chunk_size--; } - break; - - case 0x3100: - while((chunk_size > 0) && (y < y_bottom)) - { - /* ---- flag bits: 0 = SKIP, 10 = V1, 11 = V4 ---- */ - flag = (unsigned long)get_long(); - chunk_size -= 4; - mask = 0x80000000; - - while((mask) && (y < y_bottom)) - { - if(flag & mask) - { - if(mask == 1) - { - if(chunk_size < 0) break; - flag = (unsigned long)get_long(); - chunk_size -= 4; - mask = 0x80000000; - } - else mask >>= 1; - - if(flag & mask) /* V4 */ - { - d0 = get_byte(); - d1 = get_byte(); - d2 = get_byte(); - d3 = get_byte(); - chunk_size -= 4; - cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3); - } - else /* V1 */ - { - chunk_size--; - cvid_v1(mpi, x, y, v1_codebook + get_byte()); - } - } /* else SKIP */ - - mask >>= 1; - x += 4; - if(x >= (unsigned int)x1) - { - x = 0; - y += 4; - } - } - } - - while(chunk_size > 0) { skip_byte(); chunk_size--; } - break; - - case 0x3200: /* each byte is a V1 codebook */ - while((chunk_size > 0) && (y < y_bottom)) - { - cvid_v1(mpi, x, y, v1_codebook + get_byte()); - chunk_size--; - x += 4; - if(x >= (unsigned int)x1) - { - x = 0; - y += 4; - } - } - while(chunk_size > 0) { skip_byte(); chunk_size--; } - break; - - default: - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: unknown chunk_id %08lx\n", chunk_id); - while(chunk_size > 0) { skip_byte(); chunk_size--; } - break; - } - } - } - - if(len != size) - { - if(len & 0x01) len++; /* AVIs tend to have a size mismatch */ - if(len != size) - { - long xlen; - skip_byte(); - xlen = get_byte() << 16; - xlen |= get_byte() << 8; - xlen |= get_byte(); /* Read Len */ - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: END INFO chunk size %d cvid size1 %ld cvid size2 %ld\n", size, len, xlen); - } - } -} - diff -r fd85802f755b -r 628c85c15c7b cyuv.c --- a/cyuv.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,135 +0,0 @@ -/* ------------------------------------------------------------------------ - * Creative YUV Video Decoder - * - * Dr. Tim Ferguson, 2001. - * For more details on the algorithm: - * http://www.csse.monash.edu.au/~timf/videocodec.html - * - * This is a very simple predictive coder. A video frame is coded in YUV411 - * format. The first pixel of each scanline is coded using the upper four - * bits of its absolute value. Subsequent pixels for the scanline are coded - * using the difference between the last pixel and the current pixel (DPCM). - * The DPCM values are coded using a 16 entry table found at the start of the - * frame. Thus four bits per component are used and are as follows: - * UY VY YY UY VY YY UY VY... - * This code assumes the frame width will be a multiple of four pixels. This - * should probably be fixed. - * ------------------------------------------------------------------------ */ -#include -#include -#include -#include -#include - -#include "loader/wine/avifmt.h" // for mmioFOURCC macro - -/* ------------------------------------------------------------------------ - * This function decodes a buffer containing a CYUV encoded frame. - * - * buf - the input buffer to be decoded - * size - the size of the input buffer - * frame - the output frame buffer (UYVY format) - * width - the width of the output frame - * height - the height of the output frame - * format - the requested output format - */ -void decode_cyuv(unsigned char *buf, int size, unsigned char *frame, int width, int height, int format) -{ -int i, xpos, ypos, cur_Y = 0, cur_U = 0, cur_V = 0; -char *delta_y_tbl, *delta_c_tbl, *ptr; - - delta_y_tbl = buf + 16; - delta_c_tbl = buf + 32; - ptr = buf + (16 * 3); - - for(ypos = 0; ypos < height; ypos++) - for(xpos = 0; xpos < width; xpos += 4) - { - if(xpos == 0) /* first pixels in scanline */ - { - cur_U = *(ptr++); - cur_Y = (cur_U & 0x0f) << 4; - cur_U = cur_U & 0xf0; - if (format == mmioFOURCC('Y','U','Y','2')) - { - *frame++ = cur_Y; - *frame++ = cur_U; - } - else - { - *frame++ = cur_U; - *frame++ = cur_Y; - } - - cur_V = *(ptr++); - cur_Y = (cur_Y + delta_y_tbl[cur_V & 0x0f]) & 0xff; - cur_V = cur_V & 0xf0; - if (format == mmioFOURCC('Y','U','Y','2')) - { - *frame++ = cur_Y; - *frame++ = cur_V; - } - else - { - *frame++ = cur_V; - *frame++ = cur_Y; - } - } - else /* subsequent pixels in scanline */ - { - i = *(ptr++); - cur_U = (cur_U + delta_c_tbl[i >> 4]) & 0xff; - cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff; - if (format == mmioFOURCC('Y','U','Y','2')) - { - *frame++ = cur_Y; - *frame++ = cur_U; - } - else - { - *frame++ = cur_U; - *frame++ = cur_Y; - } - - i = *(ptr++); - cur_V = (cur_V + delta_c_tbl[i >> 4]) & 0xff; - cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff; - if (format == mmioFOURCC('Y','U','Y','2')) - { - *frame++ = cur_Y; - *frame++ = cur_V; - } - else - { - *frame++ = cur_V; - *frame++ = cur_Y; - } - } - - i = *(ptr++); - cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff; - if (format == mmioFOURCC('Y','U','Y','2')) - { - *frame++ = cur_Y; - *frame++ = cur_U; - } - else - { - *frame++ = cur_U; - *frame++ = cur_Y; - } - - cur_Y = (cur_Y + delta_y_tbl[i >> 4]) & 0xff; - if (format == mmioFOURCC('Y','U','Y','2')) - { - *frame++ = cur_Y; - *frame++ = cur_V; - } - else - { - *frame++ = cur_V; - *frame++ = cur_Y; - } - } -} - diff -r fd85802f755b -r 628c85c15c7b fli.c --- a/fli.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,379 +0,0 @@ -/* - FLI Decoder for MPlayer - - (C) 2001 Mike Melanson - - 32bpp support (c) alex - - Additional code and bug fixes by Roberto Togni - - For information on the FLI format, as well as various traps to - avoid while programming one, visit: - http://www.pcisys.net/~melanson/codecs/ -*/ - -#include -#include -#include "config.h" -#include "bswap.h" -#include "mp_msg.h" - -#define LE_16(x) (le2me_16(*(unsigned short *)(x))) -#define LE_32(x) (le2me_32(*(unsigned int *)(x))) - -#define FLI_256_COLOR 4 -#define FLI_DELTA 7 -#define FLI_COLOR 11 -#define FLI_LC 12 -#define FLI_BLACK 13 -#define FLI_BRUN 15 -#define FLI_COPY 16 -#define FLI_MINI 18 - -// 256 RGB entries; 25% of these bytes will be unused, but it's faster -// to index 4-byte entries -#define PALETTE_SIZE 1024 -static unsigned char palette[PALETTE_SIZE]; - -void *init_fli_decoder(int width, int height) -{ - memset(palette, 0, PALETTE_SIZE); - - return malloc(width * height * sizeof (unsigned char)); -} - -void decode_fli_frame( - unsigned char *encoded, - int encoded_size, - unsigned char *decoded, - int width, - int height, - int bytes_per_pixel, - void *context) -{ - int stream_ptr = 0; - int stream_ptr_after_color_chunk; - int pixel_ptr; - int palette_ptr1; - int palette_ptr2; - unsigned char palette_idx1; - unsigned char palette_idx2; - - unsigned int frame_size; - int num_chunks; - - unsigned int chunk_size; - int chunk_type; - - int i, j; - - int color_packets; - int color_changes; - int color_scale; - - int lines; - int compressed_lines; - int starting_line; - signed short line_packets; - int y_ptr; - int line_inc = width * bytes_per_pixel; - signed char byte_run; - int pixel_skip; - int update_whole_frame = 0; // Palette change flag - unsigned char *fli_ghost_image = (unsigned char *)context; - int ghost_pixel_ptr; - int ghost_y_ptr; - - frame_size = LE_32(&encoded[stream_ptr]); - stream_ptr += 6; // skip the magic number - num_chunks = LE_16(&encoded[stream_ptr]); - stream_ptr += 10; // skip padding - - // iterate through the chunks - frame_size -= 16; - while ((frame_size > 0) && (num_chunks > 0)) - { - chunk_size = LE_32(&encoded[stream_ptr]); - stream_ptr += 4; - chunk_type = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - - switch (chunk_type) - { - case FLI_256_COLOR: - case FLI_COLOR: - stream_ptr_after_color_chunk = stream_ptr + chunk_size - 6; - if (chunk_type == FLI_COLOR) - color_scale = 4; - else - color_scale = 1; - // set up the palette - color_packets = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - palette_ptr1 = 0; - for (i = 0; i < color_packets; i++) - { - // first byte is how many colors to skip - palette_ptr1 += (encoded[stream_ptr++] * 4); - // wrap around, for good measure - if (palette_ptr1 >= PALETTE_SIZE) - palette_ptr1 = 0; - // next byte indicates how many entries to change - color_changes = encoded[stream_ptr++]; - // if there are 0 color changes, there are actually 256 - if (color_changes == 0) - color_changes = 256; - for (j = 0; j < color_changes; j++) - { - palette[palette_ptr1++] = encoded[stream_ptr + 2] * color_scale; - palette[palette_ptr1++] = encoded[stream_ptr + 1] * color_scale; - palette[palette_ptr1++] = encoded[stream_ptr + 0] * color_scale; - palette_ptr1++; - stream_ptr += 3; - } - } - - // color chunks sometimes have weird 16-bit alignment issues; - // therefore, take the hardline approach and set the stream_ptr - // to the value calculate w.r.t. the size specified by the color - // chunk header - stream_ptr = stream_ptr_after_color_chunk; - - /* Palette has changed, must update frame */ - update_whole_frame = 1; - break; - - case FLI_DELTA: - y_ptr = ghost_y_ptr = 0; - compressed_lines = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - while (compressed_lines > 0) - { - line_packets = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - if (line_packets < 0) - { - line_packets = -line_packets; - y_ptr += (line_packets * line_inc); - ghost_y_ptr += (line_packets * width); - } - else - { - pixel_ptr = y_ptr; - ghost_pixel_ptr = ghost_y_ptr; - for (i = 0; i < line_packets; i++) - { - // account for the skip bytes - pixel_skip = encoded[stream_ptr++]; - pixel_ptr += pixel_skip * bytes_per_pixel; - ghost_pixel_ptr += pixel_skip; - byte_run = encoded[stream_ptr++]; - if (byte_run < 0) - { - byte_run = -byte_run; - palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; - palette_ptr2 = (palette_idx2 = encoded[stream_ptr++]) * 4; - for (j = 0; j < byte_run; j++) - { - fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; - decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - - fli_ghost_image[ghost_pixel_ptr++] = palette_idx2; - decoded[pixel_ptr++] = palette[palette_ptr2 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr2 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr2 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - } - else - { - for (j = 0; j < byte_run * 2; j++) - { - palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; - fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; - decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - } - } - y_ptr += line_inc; - ghost_y_ptr += width; - compressed_lines--; - } - } - break; - - case FLI_LC: - // line compressed - starting_line = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - y_ptr = starting_line * line_inc; - ghost_y_ptr = starting_line * width; - - compressed_lines = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - while (compressed_lines > 0) - { - pixel_ptr = y_ptr; - ghost_pixel_ptr = ghost_y_ptr; - line_packets = encoded[stream_ptr++]; - if (line_packets > 0) - { - for (i = 0; i < line_packets; i++) - { - // account for the skip bytes - pixel_skip = encoded[stream_ptr++]; - pixel_ptr += pixel_skip * bytes_per_pixel; - ghost_pixel_ptr += pixel_skip; - byte_run = encoded[stream_ptr++]; - if (byte_run > 0) - { - for (j = 0; j < byte_run; j++) - { - palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; - fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; - decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - } - else - { - byte_run = -byte_run; - palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; - for (j = 0; j < byte_run; j++) - { - fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; - decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - } - } - } - - y_ptr += line_inc; - ghost_y_ptr += width; - compressed_lines--; - } - break; - - case FLI_BLACK: - // set the whole frame to color 0 (which is usually black) by - // clearing the ghost image and trigger a full frame update - memset(fli_ghost_image, 0, width * height * sizeof(unsigned char)); - update_whole_frame = 1; - break; - - case FLI_BRUN: - // byte run compression - y_ptr = 0; - ghost_y_ptr = 0; - for (lines = 0; lines < height; lines++) - { - pixel_ptr = y_ptr; - ghost_pixel_ptr = ghost_y_ptr; - line_packets = encoded[stream_ptr++]; - for (i = 0; i < line_packets; i++) - { - byte_run = encoded[stream_ptr++]; - if (byte_run > 0) - { - palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; - for (j = 0; j < byte_run; j++) - { - fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; - decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - } - else // copy bytes if byte_run < 0 - { - byte_run = -byte_run; - for (j = 0; j < byte_run; j++) - { - palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; - fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; - decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - } - } - - y_ptr += line_inc; - ghost_y_ptr += width; - } - break; - - case FLI_COPY: - // copy the chunk (uncompressed frame) to the ghost image and - // schedule the whole frame to be updated - if (chunk_size - 6 > width * height) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "FLI: in chunk FLI_COPY : source data (%d bytes) bigger than image," \ - " skipping chunk\n", - chunk_size - 6); - break; - } - else - memcpy(fli_ghost_image, &encoded[stream_ptr], chunk_size - 6); - stream_ptr += chunk_size - 6; - update_whole_frame = 1; - break; - - case FLI_MINI: - // sort of a thumbnail? disregard this chunk... - stream_ptr += chunk_size - 6; - break; - - default: - mp_msg (MSGT_DECVIDEO, MSGL_WARN, - "FLI: Unrecognized chunk type: %d\n", chunk_type); - break; - } - - frame_size -= chunk_size; - num_chunks--; - } - - if (update_whole_frame) - { - pixel_ptr = ghost_pixel_ptr = 0; - while (pixel_ptr < (width * height * bytes_per_pixel)) - { - palette_ptr1 = fli_ghost_image[ghost_pixel_ptr++] * 4; - decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; - decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - } - - // by the end of the chunk, the stream ptr should equal the frame - // size (minus 1, possibly); if it doesn't, issue a warning - if ((stream_ptr != encoded_size) && (stream_ptr != encoded_size - 1)) - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - " warning: processed FLI chunk where encoded size = %d\n" \ - " and final chunk ptr = %d\n", - encoded_size, stream_ptr); -} diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/RTjpegN.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/RTjpegN.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,3800 @@ +/* + RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za) + + With modifications by: + (c) 1998, 1999 by Joerg Walter + and + (c) 1999 by Wim Taymans + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#include +#include +#include + +#include "config.h" +#ifdef HAVE_MMX +#define MMX +#endif + +#include "RTjpegN.h" + +#ifdef MMX +#include "mmx.h" +#endif + +//#define SHOWBLOCK 1 +#define BETTERCOMPRESSION 1 + +static const unsigned char RTjpeg_ZZ[64]={ +0, +8, 1, +2, 9, 16, +24, 17, 10, 3, +4, 11, 18, 25, 32, +40, 33, 26, 19, 12, 5, +6, 13, 20, 27, 34, 41, 48, +56, 49, 42, 35, 28, 21, 14, 7, +15, 22, 29, 36, 43, 50, 57, +58, 51, 44, 37, 30, 23, +31, 38, 45, 52, 59, +60, 53, 46, 39, +47, 54, 61, +62, 55, +63 }; + +static const __u64 RTjpeg_aan_tab[64]={ +4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, +5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL, +5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL, +5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL, +4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, +3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL, +2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL, +1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL, +}; + +#ifndef MMX +static __s32 RTjpeg_ws[64+31]; +#endif +__u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32]; + +static __s16 *block; // rh +static __s16 *RTjpeg_block; +static __s32 *RTjpeg_lqt; +static __s32 *RTjpeg_cqt; +static __u32 *RTjpeg_liqt; +static __u32 *RTjpeg_ciqt; + +static unsigned char RTjpeg_lb8; +static unsigned char RTjpeg_cb8; +static int RTjpeg_width, RTjpeg_height; +static int RTjpeg_Ywidth, RTjpeg_Cwidth; +static int RTjpeg_Ysize, RTjpeg_Csize; + +static __s16 *RTjpeg_old=NULL; + +#ifdef MMX +mmx_t RTjpeg_lmask; +mmx_t RTjpeg_cmask; +#else +__u16 RTjpeg_lmask; +__u16 RTjpeg_cmask; +#endif +int RTjpeg_mtest=0; + +static const unsigned char RTjpeg_lum_quant_tbl[64] = { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99 + }; + +static const unsigned char RTjpeg_chrom_quant_tbl[64] = { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + }; + +#ifdef BETTERCOMPRESSION + +/*--------------------------------------------------*/ +/* better encoding, but needs a lot more cpu time */ +/* seems to be more effective than old method +lzo */ +/* with this encoding lzo isn't efficient anymore */ +/* there is still more potential for better */ +/* encoding but that would need even more cputime */ +/* anyway your mileage may vary */ +/* */ +/* written by Martin BIELY and Roman HOCHLEITNER */ +/*--------------------------------------------------*/ + +/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* Block to Stream (encoding) */ +/* */ + +int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) +{ + register int ci, co=1; + register __s16 ZZvalue; + register unsigned char bitten; + register unsigned char bitoff; + +#ifdef SHOWBLOCK + + int ii; + for (ii=0; ii < 64; ii++) { + fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); + } + fprintf(stdout, "\n\n"); + +#endif + +// *strm++ = 0x10; +// *strm = 0x00; +// +// return 2; + + // first byte allways written + (__u8)strm[0]= + (__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); + + + ci=63; + while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--; + + bitten = ((unsigned char)ci) << 2; + + if (ci==0) { + (__u8)strm[1]= bitten; + co = 2; + return (int)co; + } + + /* bitoff=0 because the high 6bit contain first non zero position */ + bitoff = 0; + co = 1; + + for(; ci>0; ci--) { + + ZZvalue = data[RTjpeg_ZZ[ci]]; + + switch(ZZvalue) { + case 0: + break; + case 1: + bitten |= (0x01<0; ci--) { + + ZZvalue = data[RTjpeg_ZZ[ci]]; + + if( (ZZvalue > 7) || (ZZvalue < -7) ) { + bitten |= (0x08<0; ci--) { + + ZZvalue = data[RTjpeg_ZZ[ci]]; + + if(ZZvalue>0) + { + strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; + } + else + { + strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; + } + + } + + +BAUCHWEH: + /* we gotoo much now we are ill */ +#ifdef SHOWBLOCK +{ +int i; +fprintf(stdout, "\nco = '%d'\n", co); + for (i=0; i < co+2; i++) { + fprintf(stdout, "%d ", strm[i]); + } +fprintf(stdout, "\n\n"); +} +#endif + + return (int)co; +} + +/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* Stream to Block (decoding) */ +/* */ + +int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl) +{ + int ci; + register int co; + register int i; + register unsigned char bitten; + register unsigned char bitoff; + + /* first byte always read */ + i=RTjpeg_ZZ[0]; + data[i]=((__u8)strm[0])*qtbl[i]; + + /* we start at the behind */ + + bitten = ((unsigned char)strm[1]) >> 2; + co = 63; + for(; co > bitten; co--) { + + data[RTjpeg_ZZ[co]] = 0; + + } + + if (co==0) { + ci = 2; + goto AUTOBAHN; + } + + /* we have to read the last 2 bits of the second byte */ + ci=1; + bitoff = 0; + + for(; co>0; co--) { + + bitten = ((unsigned char)strm[ci]) >> bitoff; + bitten &= 0x03; + + i=RTjpeg_ZZ[co]; + + switch( bitten ) { + case 0x03: + data[i]= -qtbl[i]; + break; + case 0x02: + goto FUSSWEG; + break; + case 0x01: + data[i]= qtbl[i]; + break; + case 0x00: + data[i]= 0; + break; + default: + + } + + if( bitoff == 0 ) { + bitoff = 8; + ci++; + } + bitoff -= 2; + } + /* co is 0 now */ + /* data is written properly */ + + /* if bitoff!=6 then ci is the index, but should be the byte count, so we increment by 1 */ + if (bitoff!=6) ci++; + + goto AUTOBAHN; + + +FUSSWEG: +/* correct bitoff to nibble */ + switch(bitoff){ + case 4: + case 6: + bitoff = 0; + break; + case 2: + case 0: + /* we have to read from the next byte */ + ci++; + bitoff = 4; + break; + default: + break; + } + + for(; co>0; co--) { + + bitten = ((unsigned char)strm[ci]) >> bitoff; + bitten &= 0x0f; + + i=RTjpeg_ZZ[co]; + + if( bitten == 0x08 ) { + goto STRASSE; + } + + /* the compiler cannot do sign extension for signed nibbles */ + if( bitten & 0x08 ) { + bitten |= 0xf0; + } + /* the unsigned char bitten now is a valid signed char */ + + data[i]=((signed char)bitten)*qtbl[i]; + + if( bitoff == 0 ) { + bitoff = 8; + ci++; + } + bitoff -= 4; + } + /* co is 0 */ + + /* if bitoff!=4 then ci is the index, but should be the byte count, so we increment by 1 */ + if (bitoff!=4) ci++; + + goto AUTOBAHN; + +STRASSE: + ci++; + + for(; co>0; co--) { + i=RTjpeg_ZZ[co]; + data[i]=strm[ci++]*qtbl[i]; + } + + /* ci now is the count, because it points to next element => no incrementing */ + +AUTOBAHN: + +#ifdef SHOWBLOCK +fprintf(stdout, "\nci = '%d'\n", ci); + for (i=0; i < 64; i++) { + fprintf(stdout, "%d ", data[RTjpeg_ZZ[i]]); + } +fprintf(stdout, "\n\n"); +#endif + + return ci; +} + +#else + +int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) +{ + register int ci, co=1, tmp; + register __s16 ZZvalue; + +#ifdef SHOWBLOCK + + int ii; + for (ii=0; ii < 64; ii++) { + fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); + } + fprintf(stdout, "\n\n"); + +#endif + + (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); + + for(ci=1; ci<=bt8; ci++) + { + ZZvalue = data[RTjpeg_ZZ[ci]]; + + if(ZZvalue>0) + { + strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; + } + else + { + strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; + } + } + + for(; ci<64; ci++) + { + ZZvalue = data[RTjpeg_ZZ[ci]]; + + if(ZZvalue>0) + { + strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue; + } + else if(ZZvalue<0) + { + strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue; + } + else /* compress zeros */ + { + tmp=ci; + do + { + ci++; + } + while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0)); + + strm[co++]=(__s8)(63+(ci-tmp)); + ci--; + } + } + return (int)co; +} + +int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl) +{ + int ci=1, co=1, tmp; + register int i; + + i=RTjpeg_ZZ[0]; + data[i]=((__u8)strm[0])*qtbl[i]; + + for(co=1; co<=bt8; co++) + { + i=RTjpeg_ZZ[co]; + data[i]=strm[ci++]*qtbl[i]; + } + + for(; co<64; co++) + { + if(strm[ci]>63) + { + tmp=co+strm[ci]-63; + for(; co>16); +} +#endif + +/* + * Perform the forward DCT on one block of samples. + */ +#ifdef MMX +static mmx_t RTjpeg_C4 =(mmx_t)(long long)0x2D412D412D412D41LL; +static mmx_t RTjpeg_C6 =(mmx_t)(long long)0x187E187E187E187ELL; +static mmx_t RTjpeg_C2mC6=(mmx_t)(long long)0x22A322A322A322A3LL; +static mmx_t RTjpeg_C2pC6=(mmx_t)(long long)0x539F539F539F539FLL; +static mmx_t RTjpeg_zero =(mmx_t)(long long)0x0000000000000000LL; + +#else + +#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */ + +#define DESCALE10(x) (__s16)( ((x)+128) >> 8) +#define DESCALE20(x) (__s16)(((x)+32768) >> 16) +#define D_MULTIPLY(var,const) ((__s32) ((var) * (const))) +#endif + +void RTjpeg_dct_init(void) +{ + int i; + + for(i=0; i<64; i++) + { + RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]); + RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]); + } +} + +void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip) +{ +#ifndef MMX + __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + __s32 tmp10, tmp11, tmp12, tmp13; + __s32 z1, z2, z3, z4, z5, z11, z13; + __u8 *idataptr; + __s16 *odataptr; + __s32 *wsptr; + int ctr; + + idataptr = idata; + wsptr = RTjpeg_ws; + for (ctr = 7; ctr >= 0; ctr--) { + tmp0 = idataptr[0] + idataptr[7]; + tmp7 = idataptr[0] - idataptr[7]; + tmp1 = idataptr[1] + idataptr[6]; + tmp6 = idataptr[1] - idataptr[6]; + tmp2 = idataptr[2] + idataptr[5]; + tmp5 = idataptr[2] - idataptr[5]; + tmp3 = idataptr[3] + idataptr[4]; + tmp4 = idataptr[3] - idataptr[4]; + + tmp10 = (tmp0 + tmp3); /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = (tmp1 + tmp2); + tmp12 = tmp1 - tmp2; + + wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */ + wsptr[4] = (tmp10 - tmp11)<<8; + + z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + wsptr[2] = (tmp13<<8) + z1; /* phase 5 */ + wsptr[6] = (tmp13<<8) - z1; + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = (tmp7<<8) + z3; /* phase 5 */ + z13 = (tmp7<<8) - z3; + + wsptr[5] = z13 + z2; /* phase 6 */ + wsptr[3] = z13 - z2; + wsptr[1] = z11 + z4; + wsptr[7] = z11 - z4; + + idataptr += rskip<<3; /* advance pointer to next row */ + wsptr += 8; + } + + wsptr = RTjpeg_ws; + odataptr=odata; + for (ctr = 7; ctr >= 0; ctr--) { + tmp0 = wsptr[0] + wsptr[56]; + tmp7 = wsptr[0] - wsptr[56]; + tmp1 = wsptr[8] + wsptr[48]; + tmp6 = wsptr[8] - wsptr[48]; + tmp2 = wsptr[16] + wsptr[40]; + tmp5 = wsptr[16] - wsptr[40]; + tmp3 = wsptr[24] + wsptr[32]; + tmp4 = wsptr[24] - wsptr[32]; + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */ + odataptr[32] = DESCALE10(tmp10 - tmp11); + + z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */ + odataptr[48] = DESCALE20((tmp13<<8) - z1); + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = (tmp7<<8) + z3; /* phase 5 */ + z13 = (tmp7<<8) - z3; + + odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */ + odataptr[24] = DESCALE20(z13 - z2); + odataptr[8] = DESCALE20(z11 + z4); + odataptr[56] = DESCALE20(z11 - z4); + + odataptr++; /* advance pointer to next column */ + wsptr++; + } +#else + volatile mmx_t tmp6, tmp7; + register mmx_t *dataptr = (mmx_t *)odata; + mmx_t *idata2 = (mmx_t *)idata; + + // first copy the input 8 bit to the destination 16 bits + + movq_m2r(RTjpeg_zero, mm2); + + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+1)); + + idata2 += rskip; + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr+2)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+3)); + + idata2 += rskip; + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr+4)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+5)); + + idata2 += rskip; + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr+6)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+7)); + + idata2 += rskip; + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr+8)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+9)); + + idata2 += rskip; + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr+10)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+11)); + + idata2 += rskip; + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr+12)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+13)); + + idata2 += rskip; + + movq_m2r(*idata2, mm0); + movq_r2r(mm0, mm1); + + punpcklbw_r2r(mm2, mm0); + movq_r2m(mm0, *(dataptr+14)); + + punpckhbw_r2r(mm2, mm1); + movq_r2m(mm1, *(dataptr+15)); + +/* Start Transpose to do calculations on rows */ + + movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into m5 + + movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 + movq_r2r(mm7, mm5); + + punpcklwd_m2r(*(dataptr+11), mm7); // m11:m01|m10:m00 - interleave first and second lines + movq_r2r(mm6, mm2); + + punpcklwd_m2r(*(dataptr+15), mm6); // m31:m21|m30:m20 - interleave third and fourth lines + movq_r2r(mm7, mm1); + + movq_m2r(*(dataptr+11), mm3); // m13:m13|m11:m10 - second line + punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 + + movq_m2r(*(dataptr+15), mm0); // m13:m13|m11:m10 - fourth line + punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 + + movq_r2m(mm7,*(dataptr+9)); // write result 1 + punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines + + movq_r2m(mm1,*(dataptr+11)); // write result 2 + punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines + + movq_r2r(mm5, mm1); + punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 + + movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 + punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 + + movq_r2m(mm5,*(dataptr+13)); // write result 3 + + // last 4x4 done + + movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 + + movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line + movq_r2r(mm0, mm6); + + punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines + movq_r2r(mm2, mm7); + + punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines + movq_r2r(mm0, mm4); + + // + movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line + punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result + + movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line + punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result + + punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines + movq_r2r(mm1, mm2); // copy first line + + punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines + movq_r2r(mm6, mm5); // copy first intermediate result + + movq_r2m(mm0, *(dataptr+8)); // write result 1 + punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result + + punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines + movq_r2r(mm3, mm0); // copy third line + + punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines + + movq_r2m(mm4, *(dataptr+10)); // write result 2 out + punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result + + punpcklwd_m2r(*(dataptr+14), mm3); // n31:n21|n30:n20 - interleave third and fourth lines + movq_r2r(mm1, mm4); + + movq_r2m(mm6, *(dataptr+12)); // write result 3 out + punpckldq_r2r(mm3, mm1); // n30:n20|n10:n00 - produce first result + + punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines + movq_r2r(mm2, mm6); + + movq_r2m(mm5, *(dataptr+14)); // write result 4 out + punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result + + movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) + punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result + + movq_r2m(mm4, *(dataptr+3)); // write result 6 out + punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result + + movq_r2m(mm2, *(dataptr+5)); // write result 7 out + + movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 + + movq_r2m(mm6, *(dataptr+7)); // write result 8 out + + +// Do first 4x4 quadrant, which is used in the beginning of the DCT: + + movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line + movq_r2r(mm0, mm2); + + punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines + movq_r2r(mm7, mm4); + + punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines + movq_r2r(mm0, mm1); + + movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line + punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 + + movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line + punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 + + movq_r2r(mm0, mm7); // write result 1 + punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines + + psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ + movq_r2r(mm1, mm6); // write result 2 + + paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ + punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines + + paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ + movq_r2r(mm2, mm3); // copy first intermediate result + + psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ + punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 + + movq_r2m(mm7, tmp7); + movq_r2r(mm2, mm5); // write result 3 + + movq_r2m(mm6, tmp6); + punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 + + paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+5 /* Stage 1 */ + movq_r2r(mm3, mm4); // write result 4 + +/************************************************************************************************ + End of Transpose +************************************************************************************************/ + + + paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ + movq_r2r(mm0, mm7); + + psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ + movq_r2r(mm1, mm6); + + paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ + psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ + + psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ + paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ + + psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ + paddw_r2r(mm7, mm6); // tmp12 + tmp13 + + /* stage 3 */ + + movq_m2r(tmp6, mm2); + movq_r2r(mm0, mm3); + + psllw_i2r(2, mm6); // m8 * 2^2 + paddw_r2r(mm1, mm0); + + pmulhw_m2r(RTjpeg_C4, mm6); // z1 + psubw_r2r(mm1, mm3); + + movq_r2m(mm0, *dataptr); + movq_r2r(mm7, mm0); + + /* Odd part */ + movq_r2m(mm3, *(dataptr+8)); + paddw_r2r(mm5, mm4); // tmp10 + + movq_m2r(tmp7, mm3); + paddw_r2r(mm6, mm0); // tmp32 + + paddw_r2r(mm2, mm5); // tmp11 + psubw_r2r(mm6, mm7); // tmp33 + + movq_r2m(mm0, *(dataptr+4)); + paddw_r2r(mm3, mm2); // tmp12 + + /* stage 4 */ + + movq_r2m(mm7, *(dataptr+12)); + movq_r2r(mm4, mm1); // copy of tmp10 + + psubw_r2r(mm2, mm1); // tmp10 - tmp12 + psllw_i2r(2, mm4); // m8 * 2^2 + + movq_m2r(RTjpeg_C2mC6, mm0); + psllw_i2r(2, mm1); + + pmulhw_m2r(RTjpeg_C6, mm1); // z5 + psllw_i2r(2, mm2); + + pmulhw_r2r(mm0, mm4); // z5 + + /* stage 5 */ + + pmulhw_m2r(RTjpeg_C2pC6, mm2); + psllw_i2r(2, mm5); + + pmulhw_m2r(RTjpeg_C4, mm5); // z3 + movq_r2r(mm3, mm0); // copy tmp7 + + movq_m2r(*(dataptr+1), mm7); + paddw_r2r(mm1, mm4); // z2 + + paddw_r2r(mm1, mm2); // z4 + + paddw_r2r(mm5, mm0); // z11 + psubw_r2r(mm5, mm3); // z13 + + /* stage 6 */ + + movq_r2r(mm3, mm5); // copy z13 + psubw_r2r(mm4, mm3); // y3=z13 - z2 + + paddw_r2r(mm4, mm5); // y5=z13 + z2 + movq_r2r(mm0, mm6); // copy z11 + + movq_r2m(mm3, *(dataptr+6)); //save y3 + psubw_r2r(mm2, mm0); // y7=z11 - z4 + + movq_r2m(mm5, *(dataptr+10)); //save y5 + paddw_r2r(mm2, mm6); // y1=z11 + z4 + + movq_r2m(mm0, *(dataptr+14)); //save y7 + + /************************************************ + * End of 1st 4 rows + ************************************************/ + + movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ + movq_r2r(mm7, mm0); // copy x0 + + movq_r2m(mm6, *(dataptr+2)); //save y1 + + movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ + movq_r2r(mm1, mm6); // copy x1 + + paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 + + movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ + movq_r2r(mm2, mm5); // copy x2 + + psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 + movq_r2r(mm3, mm4); // copy x3 + + paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 + + movq_r2m(mm7, tmp7); // save tmp07 + movq_r2r(mm0, mm7); // copy tmp00 + + psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 + + /* stage 2, Even Part */ + + paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 + + movq_r2m(mm6, tmp6); // save tmp07 + movq_r2r(mm1, mm6); // copy tmp01 + + paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 + paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 + + psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 + + psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 + psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 + + paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 + + psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 + paddw_r2r(mm7, mm6); // tmp12 + tmp13 + + /* stage 3, Even and stage 4 & 5 even */ + + movq_m2r(tmp6, mm2); // load tmp6 + movq_r2r(mm0, mm3); // copy tmp10 + + psllw_i2r(2, mm6); // shift z1 + paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 + + pmulhw_m2r(RTjpeg_C4, mm6); // z1 + psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 + + movq_r2m(mm0, *(dataptr+1)); //save y0 + movq_r2r(mm7, mm0); // copy tmp13 + + /* odd part */ + + movq_r2m(mm3, *(dataptr+9)); //save y4 + paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 + + movq_m2r(tmp7, mm3); // load tmp7 + paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 + + paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 + psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 + + movq_r2m(mm0, *(dataptr+5)); //save y2 + paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 + + /* stage 4 */ + + movq_r2m(mm7, *(dataptr+13)); //save y6 + movq_r2r(mm4, mm1); // copy tmp10 + + psubw_r2r(mm2, mm1); // tmp10 - tmp12 + psllw_i2r(2, mm4); // shift tmp10 + + movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 + psllw_i2r(2, mm1); // shift (tmp10-tmp12) + + pmulhw_m2r(RTjpeg_C6, mm1); // z5 + psllw_i2r(2, mm5); // prepare for multiply + + pmulhw_r2r(mm0, mm4); // multiply by converted real + + /* stage 5 */ + + pmulhw_m2r(RTjpeg_C4, mm5); // z3 + psllw_i2r(2, mm2); // prepare for multiply + + pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply + movq_r2r(mm3, mm0); // copy tmp7 + + movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 + paddw_r2r(mm1, mm4); // z2 + + paddw_r2r(mm5, mm0); // z11 + psubw_r2r(mm5, mm3); // z13 + + /* stage 6 */ + + movq_r2r(mm3, mm5); // copy z13 + paddw_r2r(mm1, mm2); // z4 + + movq_r2r(mm0, mm6); // copy z11 + psubw_r2r(mm4, mm5); // y3 + + paddw_r2r(mm2, mm6); // y1 + paddw_r2r(mm4, mm3); // y5 + + movq_r2m(mm5, *(dataptr+7)); //save y3 + + movq_r2m(mm6, *(dataptr+3)); //save y1 + psubw_r2r(mm2, mm0); // y7 + +/************************************************************************************************ + Start of Transpose +************************************************************************************************/ + + movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 + movq_r2r(mm7, mm5); // copy first line + + punpcklwd_r2r(mm3, mm7); // m11:m01|m10:m00 - interleave first and second lines + movq_r2r(mm6, mm2); // copy third line + + punpcklwd_r2r(mm0, mm6); // m31:m21|m30:m20 - interleave third and fourth lines + movq_r2r(mm7, mm1); // copy first intermediate result + + punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 + + punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 + + movq_r2m(mm7, *(dataptr+9)); // write result 1 + punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines + + movq_r2m(mm1, *(dataptr+11)); // write result 2 + punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines + + movq_r2r(mm5, mm1); // copy first intermediate result + punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 + + movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 + punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 + + movq_r2m(mm5, *(dataptr+13)); // write result 3 + + /****** last 4x4 done */ + + movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 + + movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line + movq_r2r(mm0, mm6); // copy first line + + punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines + movq_r2r(mm2, mm7); // copy third line + + punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines + movq_r2r(mm0, mm4); // copy first intermediate result + + + + movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line + punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result + + movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line + punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result + + punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines + movq_r2r(mm1, mm2); // copy first line + + punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines + movq_r2r(mm6, mm5); // copy first intermediate result + + movq_r2m(mm0, *(dataptr+8)); // write result 1 + punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result + + punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines + movq_r2r(mm3, mm0); // copy third line + + punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines + + movq_r2m(mm4, *(dataptr+10)); // write result 2 out + punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result + + punpcklwd_m2r(*(dataptr+14), mm3); // n33:n23|n32:n22 - interleave third and fourth lines + movq_r2r(mm1, mm4); // copy second intermediate result + + movq_r2m(mm6, *(dataptr+12)); // write result 3 out + punpckldq_r2r(mm3, mm1); // + + punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines + movq_r2r(mm2, mm6); // copy second intermediate result + + movq_r2m(mm5, *(dataptr+14)); // write result 4 out + punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result + + movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) + punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result + + movq_r2m(mm4, *(dataptr+3)); // write result 6 out + punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result + + movq_r2m(mm2, *(dataptr+5)); // write result 7 out + + movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 + + movq_r2m(mm6, *(dataptr+7)); // write result 8 out + +// Do first 4x4 quadrant, which is used in the beginning of the DCT: + + movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line + movq_r2r(mm0, mm2); // copy first line + + punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines + movq_r2r(mm7, mm4); // copy third line + + punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines + movq_r2r(mm0, mm1); // copy first intermediate result + + movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line + punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 + + movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line + punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 + + movq_r2r(mm0, mm7); // write result 1 + punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines + + psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ + movq_r2r(mm1, mm6); // write result 2 + + paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ + punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines + + paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ + movq_r2r(mm2, mm3); // copy first intermediate result + + psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ + punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 + + movq_r2m(mm7, tmp7); // save tmp07 + movq_r2r(mm2, mm5); // write result 3 + + movq_r2m(mm6, tmp6); // save tmp06 + + punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 + + paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+x5 /* stage 1 */ + movq_r2r(mm3, mm4); // write result 4 + +/************************************************************************************************ + End of Transpose 2 +************************************************************************************************/ + + paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ + movq_r2r(mm0, mm7); + + psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ + movq_r2r(mm1, mm6); + + paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ + psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ + + psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ + paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ + + psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ + paddw_r2r(mm7, mm6); // tmp12 + tmp13 + + /* stage 3 */ + + movq_m2r(tmp6, mm2); + movq_r2r(mm0, mm3); + + psllw_i2r(2, mm6); // m8 * 2^2 + paddw_r2r(mm1, mm0); + + pmulhw_m2r(RTjpeg_C4, mm6); // z1 + psubw_r2r(mm1, mm3); + + movq_r2m(mm0, *dataptr); + movq_r2r(mm7, mm0); + + /* Odd part */ + movq_r2m(mm3, *(dataptr+8)); + paddw_r2r(mm5, mm4); // tmp10 + + movq_m2r(tmp7, mm3); + paddw_r2r(mm6, mm0); // tmp32 + + paddw_r2r(mm2, mm5); // tmp11 + psubw_r2r(mm6, mm7); // tmp33 + + movq_r2m(mm0, *(dataptr+4)); + paddw_r2r(mm3, mm2); // tmp12 + + /* stage 4 */ + movq_r2m(mm7, *(dataptr+12)); + movq_r2r(mm4, mm1); // copy of tmp10 + + psubw_r2r(mm2, mm1); // tmp10 - tmp12 + psllw_i2r(2, mm4); // m8 * 2^2 + + movq_m2r(RTjpeg_C2mC6, mm0); + psllw_i2r(2, mm1); + + pmulhw_m2r(RTjpeg_C6, mm1); // z5 + psllw_i2r(2, mm2); + + pmulhw_r2r(mm0, mm4); // z5 + + /* stage 5 */ + + pmulhw_m2r(RTjpeg_C2pC6, mm2); + psllw_i2r(2, mm5); + + pmulhw_m2r(RTjpeg_C4, mm5); // z3 + movq_r2r(mm3, mm0); // copy tmp7 + + movq_m2r(*(dataptr+1), mm7); + paddw_r2r(mm1, mm4); // z2 + + paddw_r2r(mm1, mm2); // z4 + + paddw_r2r(mm5, mm0); // z11 + psubw_r2r(mm5, mm3); // z13 + + /* stage 6 */ + + movq_r2r(mm3, mm5); // copy z13 + psubw_r2r(mm4, mm3); // y3=z13 - z2 + + paddw_r2r(mm4, mm5); // y5=z13 + z2 + movq_r2r(mm0, mm6); // copy z11 + + movq_r2m(mm3, *(dataptr+6)); //save y3 + psubw_r2r(mm2, mm0); // y7=z11 - z4 + + movq_r2m(mm5, *(dataptr+10)); //save y5 + paddw_r2r(mm2, mm6); // y1=z11 + z4 + + movq_r2m(mm0, *(dataptr+14)); //save y7 + + /************************************************ + * End of 1st 4 rows + ************************************************/ + + movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ + movq_r2r(mm7, mm0); // copy x0 + + movq_r2m(mm6, *(dataptr+2)); //save y1 + + movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ + movq_r2r(mm1, mm6); // copy x1 + + paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 + + movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ + movq_r2r(mm2, mm5); // copy x2 + + psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 + movq_r2r(mm3, mm4); // copy x3 + + paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 + + movq_r2m(mm7, tmp7); // save tmp07 + movq_r2r(mm0, mm7); // copy tmp00 + + psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 + + /* stage 2, Even Part */ + + paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 + + movq_r2m(mm6, tmp6); // save tmp07 + movq_r2r(mm1, mm6); // copy tmp01 + + paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 + paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 + + psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 + + psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 + psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 + + paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 + + psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 + paddw_r2r(mm7, mm6); // tmp12 + tmp13 + + /* stage 3, Even and stage 4 & 5 even */ + + movq_m2r(tmp6, mm2); // load tmp6 + movq_r2r(mm0, mm3); // copy tmp10 + + psllw_i2r(2, mm6); // shift z1 + paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 + + pmulhw_m2r(RTjpeg_C4, mm6); // z1 + psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 + + movq_r2m(mm0, *(dataptr+1)); //save y0 + movq_r2r(mm7, mm0); // copy tmp13 + + /* odd part */ + + movq_r2m(mm3, *(dataptr+9)); //save y4 + paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 + + movq_m2r(tmp7, mm3); // load tmp7 + paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 + + paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 + psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 + + movq_r2m(mm0, *(dataptr+5)); //save y2 + paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 + + /* stage 4 */ + + movq_r2m(mm7, *(dataptr+13)); //save y6 + movq_r2r(mm4, mm1); // copy tmp10 + + psubw_r2r(mm2, mm1); // tmp10 - tmp12 + psllw_i2r(2, mm4); // shift tmp10 + + movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 + psllw_i2r(2, mm1); // shift (tmp10-tmp12) + + pmulhw_m2r(RTjpeg_C6, mm1); // z5 + psllw_i2r(2, mm5); // prepare for multiply + + pmulhw_r2r(mm0, mm4); // multiply by converted real + + /* stage 5 */ + + pmulhw_m2r(RTjpeg_C4, mm5); // z3 + psllw_i2r(2, mm2); // prepare for multiply + + pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply + movq_r2r(mm3, mm0); // copy tmp7 + + movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 + paddw_r2r(mm1, mm4); // z2 + + paddw_r2r(mm5, mm0); // z11 + psubw_r2r(mm5, mm3); // z13 + + /* stage 6 */ + + movq_r2r(mm3, mm5); // copy z13 + paddw_r2r(mm1, mm2); // z4 + + movq_r2r(mm0, mm6); // copy z11 + psubw_r2r(mm4, mm5); // y3 + + paddw_r2r(mm2, mm6); // y1 + paddw_r2r(mm4, mm3); // y5 + + movq_r2m(mm5, *(dataptr+7)); //save y3 + psubw_r2r(mm2, mm0); // yè=z11 - z4 + + movq_r2m(mm3, *(dataptr+11)); //save y5 + + movq_r2m(mm6, *(dataptr+3)); //save y1 + + movq_r2m(mm0, *(dataptr+15)); //save y7 + + +#endif +} + +#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */ +#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */ +#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */ +#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */ + +#define DESCALE(x) (__s16)( ((x)+4) >> 3) + +/* clip yuv to 16..235 (should be 16..240 for cr/cb but ... */ + +#define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x)) +#define MULTIPLY(var,const) (((__s32) ((var) * (const)) + 128)>>8) + +void RTjpeg_idct_init(void) +{ + int i; + + for(i=0; i<64; i++) + { + RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32; + RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32; + } +} + +void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip) +{ +#ifdef MMX + +static mmx_t fix_141 = (mmx_t)(long long)0x5a825a825a825a82LL; +static mmx_t fix_184n261 = (mmx_t)(long long)0xcf04cf04cf04cf04LL; +static mmx_t fix_184 = (mmx_t)(long long)0x7641764176417641LL; +static mmx_t fix_n184 = (mmx_t)(long long)0x896f896f896f896fLL; +static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL; + + mmx_t workspace[64]; + mmx_t *wsptr = workspace; + register mmx_t *dataptr = (mmx_t *)odata; + mmx_t *idata = (mmx_t *)data; + + rskip = rskip>>3; +/* + * Perform inverse DCT on one block of coefficients. + */ + + /* Odd part */ + + movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5] + + movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3] + + movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1] + + movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */ + + movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7] + + paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5; + + psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5 + + psllw_i2r(2, mm2); // shift z10 + movq_r2r(mm2, mm0); // copy z10 + + pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ + movq_r2r(mm3, mm5); // copy tmp4 + + pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ + paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7; + + movq_r2r(mm3, mm6); // copy z11 /* phase 5 */ + psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7; + + psubw_r2r(mm1, mm6); // z11-z13 + psllw_i2r(2, mm5); // shift z12 + + movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part + movq_r2r(mm5, mm7); // copy z12 + + pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part + paddw_r2r(mm1, mm3); // tmp7 = z11 + z13; + + //ok + + /* Even part */ + pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ + psllw_i2r(2, mm6); + + movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2] + + paddw_r2r(mm5, mm0); // tmp10 + + paddw_r2r(mm7, mm2); // tmp12 + + pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ + psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7 + + movq_r2r(mm1, mm5); // copy tmp1 + paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */ + + psubw_r2r(mm4, mm5); // tmp1-tmp3 + psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6; + + movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace + psllw_i2r(2, mm5); // shift tmp1-tmp3 + + movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0] + + pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562) + paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5; + + movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4] + + psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ + + movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace + movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */ + + movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace + psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2; + + paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2; + movq_r2r(mm1, mm5); // copy tmp11 + + paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12; + movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */ + + paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13; + + psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13; + movq_r2r(mm7, mm0); // copy tmp0 + + psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12; + paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); + + psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); + + movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0] + movq_r2r(mm1, mm3); // copy tmp1 + + movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7] + paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); + + psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); + + movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1] + movq_r2r(mm4, mm1); // copy tmp3 + + movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6] + + paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); + + psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); + + movq_r2m(mm4, *(wsptr+8)); + movq_r2r(mm5, mm7); // copy tmp2 + + paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) + + movq_r2m(mm1, *(wsptr+6)); + psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); + + movq_r2m(mm5, *(wsptr+4)); + + movq_r2m(mm7, *(wsptr+10)); + + //ok + + +/*****************************************************************/ + + idata++; + wsptr++; + +/*****************************************************************/ + + movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5] + + movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3] + + movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1] + movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */ + + movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7] + paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5; + + psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5 + + psllw_i2r(2, mm2); // shift z10 + movq_r2r(mm2, mm0); // copy z10 + + pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ + movq_r2r(mm3, mm5); // copy tmp4 + + pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ + paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7; + + movq_r2r(mm3, mm6); // copy z11 /* phase 5 */ + psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7; + + psubw_r2r(mm1, mm6); // z11-z13 + psllw_i2r(2, mm5); // shift z12 + + movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part + movq_r2r(mm5, mm7); // copy z12 + + pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part + paddw_r2r(mm1, mm3); // tmp7 = z11 + z13; + + //ok + + /* Even part */ + pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ + psllw_i2r(2, mm6); + + movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2] + + paddw_r2r(mm5, mm0); // tmp10 + + paddw_r2r(mm7, mm2); // tmp12 + + pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ + psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7 + + movq_r2r(mm1, mm5); // copy tmp1 + paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */ + + psubw_r2r(mm4, mm5); // tmp1-tmp3 + psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6; + + movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace + psllw_i2r(2, mm5); // shift tmp1-tmp3 + + movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0] + paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5; + + pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562) + + movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4] + + psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ + + movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace + movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */ + + movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace + psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2; + + paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2; + movq_r2r(mm1, mm5); // copy tmp11 + + paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12; + movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */ + + paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13; + + psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13; + movq_r2r(mm7, mm0); // copy tmp0 + + psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12; + paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); + + psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); + + movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0] + movq_r2r(mm1, mm3); // copy tmp1 + + movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7] + paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); + + psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); + + movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1] + movq_r2r(mm4, mm1); // copy tmp3 + + movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6] + + paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); + + psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); + + movq_r2m(mm4, *(wsptr+8)); + movq_r2r(mm5, mm7); // copy tmp2 + + paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) + + movq_r2m(mm1, *(wsptr+6)); + psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); + + movq_r2m(mm5, *(wsptr+4)); + + movq_r2m(mm7, *(wsptr+10)); + +/*****************************************************************/ + + /* Pass 2: process rows from work array, store into output array. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + +/*****************************************************************/ + /* Even part */ + + wsptr--; + +// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); +// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); +// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); +// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); + movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3] + + movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7] + movq_r2r(mm0, mm2); + + movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3] + paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] + + movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7] + psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] + + movq_r2r(mm0, mm6); + movq_r2r(mm3, mm5); + + paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] + movq_r2r(mm2, mm1); + + psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] + punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] + + movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7] + punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] + + movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3] + punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] + + punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] + movq_r2r(mm3, mm4); + + movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3] + punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] + + movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7] + punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] + + + paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] + movq_r2r(mm6, mm2); + + psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] + paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] + + movq_r2r(mm3, mm5); + punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] + + psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] + punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] + + movq_r2r(mm4, mm7); + punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] + + punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] + + punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] + + punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] + movq_r2r(mm1, mm6); + + //ok + +// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] +// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] + + + movq_r2r(mm0, mm2); + punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] + + punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] + psllw_i2r(2, mm6); + + pmulhw_m2r(fix_141, mm6); + punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] + + punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] + movq_r2r(mm0, mm7); + +// tmp0 = tmp10 + tmp13; +// tmp3 = tmp10 - tmp13; + paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] + psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] + +// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; + psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] +// tmp1 = tmp11 + tmp12; +// tmp2 = tmp11 - tmp12; + movq_r2r(mm1, mm5); + + //OK + + /* Odd part */ + +// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; +// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; +// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; +// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; + movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3] + paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] + + movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7] + psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] + + movq_r2r(mm3, mm6); + punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5] + + punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3] + movq_r2r(mm3, mm2); + +//Save tmp0 and tmp1 in wsptr + movq_r2m(mm0, *(wsptr)); // save tmp0 + paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13] + + +//Continue with z10 --- z13 + movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3] + psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10] + + movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7] + movq_r2r(mm6, mm4); + + movq_r2m(mm1, *(wsptr+1)); // save tmp1 + punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5] + + punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3] + movq_r2r(mm6, mm1); + +//Save tmp2 and tmp3 in wsptr + paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13] + movq_r2r(mm2, mm4); + +//Continue with z10 --- z13 + movq_r2m(mm5, *(wsptr+2)); // save tmp2 + punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11] + + psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10] + punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13] + + movq_r2r(mm3, mm0); + punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12] + + movq_r2m(mm7, *(wsptr+3)); // save tmp3 + punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10] + + movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3] + punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11] + + movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7] + punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13] + + movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3] + movq_r2r(mm6, mm4); + + punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5] + movq_r2r(mm1, mm5); + + punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3] + movq_r2r(mm6, mm2); + + movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7] + paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13] + + psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10] + punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5] + + punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3] + movq_r2r(mm1, mm7); + + paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13] + psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10] + + movq_r2r(mm6, mm5); + punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11] + + punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13] + movq_r2r(mm2, mm4); + + punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12] + + punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10] + + punpckhdq_r2r(mm6, mm4); /// wsptr[2,z10],[3,z10],[2,z11],[3,z11] + + punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13] + movq_r2r(mm0, mm5); + + punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10] + + punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11] + movq_r2r(mm3, mm4); + + punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13] + movq_r2r(mm5, mm1); + + punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12] +// tmp7 = z11 + z13; /* phase 5 */ +// tmp8 = z11 - z13; /* phase 5 */ + psubw_r2r(mm4, mm1); // tmp8 + + paddw_r2r(mm4, mm5); // tmp7 +// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ + psllw_i2r(2, mm1); + + psllw_i2r(2, mm0); + + pmulhw_m2r(fix_141, mm1); // tmp21 +// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ +// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ + psllw_i2r(2, mm3); + movq_r2r(mm0, mm7); + + pmulhw_m2r(fix_n184, mm7); + movq_r2r(mm3, mm6); + + movq_m2r(*(wsptr), mm2); // tmp0,final1 + + pmulhw_m2r(fix_108n184, mm6); +// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ +// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ + movq_r2r(mm2, mm4); // final1 + + pmulhw_m2r(fix_184n261, mm0); + paddw_r2r(mm5, mm2); // tmp0+tmp7,final1 + + pmulhw_m2r(fix_184, mm3); + psubw_r2r(mm5, mm4); // tmp0-tmp7,final1 + +// tmp6 = tmp22 - tmp7; /* phase 2 */ + psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1 + + paddw_r2r(mm6, mm7); // tmp20 + psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1 + + paddw_r2r(mm0, mm3); // tmp22 + +// tmp5 = tmp21 - tmp6; + psubw_r2r(mm5, mm3); // tmp6 + +// tmp4 = tmp20 + tmp5; + movq_m2r(*(wsptr+1), mm0); // tmp1,final2 + psubw_r2r(mm3, mm1); // tmp5 + + movq_r2r(mm0, mm6); // final2 + paddw_r2r(mm3, mm0); // tmp1+tmp6,final2 + + /* Final output stage: scale down by a factor of 8 and range-limit */ + + +// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) +// & RANGE_MASK]; final1 + + +// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) +// & RANGE_MASK]; final2 + psubw_r2r(mm3, mm6); // tmp1-tmp6,final2 + psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1] + + psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6] + + packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] + + movq_m2r(*(wsptr+2), mm5); // tmp2,final3 + packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] + +// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) +// & RANGE_MASK]; final3 + paddw_r2r(mm1, mm7); // tmp4 + movq_r2r(mm5, mm3); + + paddw_r2r(mm1, mm5); // tmp2+tmp5 + psubw_r2r(mm1, mm3); // tmp2-tmp5 + + psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2] + + movq_m2r(*(wsptr+3), mm4); // tmp3,final4 + psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5] + + + +// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) +// & RANGE_MASK]; final4 + movq_r2r(mm4, mm6); + paddw_r2r(mm7, mm4); // tmp3+tmp4 + + psubw_r2r(mm7, mm6); // tmp3-tmp4 + psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4] + + // mov ecx, [dataptr] + + psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3] + + packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] + + packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] + movq_r2r(mm2, mm4); + + movq_r2r(mm5, mm7); + punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] + + punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] + movq_r2r(mm2, mm1); + + punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] + + // add dataptr, 4 + + punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] + + punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] + + // add ecx, output_col + + movq_r2r(mm7, mm6); + punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] + + movq_r2r(mm2, mm0); + punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] + + // mov idata, [dataptr] + + punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] + + // add dataptr, 4 + + movq_r2r(mm1, mm3); + + // add idata, output_col + + punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] + + movq_r2m(mm2, *(dataptr)); + + punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] + + dataptr += rskip; + movq_r2m(mm0, *(dataptr)); + + punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] + punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] + + dataptr += rskip; + movq_r2m(mm1, *(dataptr)); + + dataptr += rskip; + movq_r2m(mm3, *(dataptr)); + +/*******************************************************************/ + + wsptr += 8; + +/*******************************************************************/ + +// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); +// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); +// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); +// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); + movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3] + + movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7] + movq_r2r(mm0, mm2); + + movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3] + paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] + + movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7] + psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] + + movq_r2r(mm0, mm6); + movq_r2r(mm3, mm5); + + paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] + movq_r2r(mm2, mm1); + + psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] + punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] + + movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7] + punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] + + movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3] + punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] + + punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] + movq_r2r(mm3, mm4); + + movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3] + punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] + + movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7] + punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] + + paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] + movq_r2r(mm6, mm2); + + psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] + paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] + + movq_r2r(mm3, mm5); + punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] + + psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] + punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] + + movq_r2r(mm4, mm7); + punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] + + punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] + + punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] + + punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] + movq_r2r(mm1, mm6); + + //OK + +// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] +// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] + + movq_r2r(mm0, mm2); + punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] + + punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] + psllw_i2r(2, mm6); + + pmulhw_m2r(fix_141, mm6); + punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] + + punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] + movq_r2r(mm0, mm7); + +// tmp0 = tmp10 + tmp13; +// tmp3 = tmp10 - tmp13; + paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] + psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] + +// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; + psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] +// tmp1 = tmp11 + tmp12; +// tmp2 = tmp11 - tmp12; + movq_r2r(mm1, mm5); + + //OK + + + /* Odd part */ + +// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; +// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; +// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; +// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; + movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3] + paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] + + movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7] + psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] + + movq_r2r(mm3, mm6); + punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5] + + punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3] + movq_r2r(mm3, mm2); + +//Save tmp0 and tmp1 in wsptr + movq_r2m(mm0, *(wsptr)); // save tmp0 + paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13] + + +//Continue with z10 --- z13 + movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3] + psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10] + + movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7] + movq_r2r(mm6, mm4); + + movq_r2m(mm1, *(wsptr+1)); // save tmp1 + punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5] + + punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3] + movq_r2r(mm6, mm1); + +//Save tmp2 and tmp3 in wsptr + paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13] + movq_r2r(mm2, mm4); + +//Continue with z10 --- z13 + movq_r2m(mm5, *(wsptr+2)); // save tmp2 + punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11] + + psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10] + punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13] + + movq_r2r(mm3, mm0); + punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12] + + movq_r2m(mm7, *(wsptr+3)); // save tmp3 + punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10] + + movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3] + punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11] + + movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7] + punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13] + + movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3] + movq_r2r(mm6, mm4); + + punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5] + movq_r2r(mm1, mm5); + + punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3] + movq_r2r(mm6, mm2); + + movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7] + paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13] + + psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10] + punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5] + + punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3] + movq_r2r(mm1, mm7); + + paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13] + psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10] + + movq_r2r(mm6, mm5); + punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11] + + punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13] + movq_r2r(mm2, mm4); + + punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12] + + punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10] + + punpckhdq_r2r(mm6, mm4); // wsptr[2,z10],[3,z10],[2,z11],[3,z11] + + punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13] + movq_r2r(mm0, mm5); + + punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10] + + punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11] + movq_r2r(mm3, mm4); + + punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13] + movq_r2r(mm5, mm1); + + punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12] +// tmp7 = z11 + z13; /* phase 5 */ +// tmp8 = z11 - z13; /* phase 5 */ + psubw_r2r(mm4, mm1); // tmp8 + + paddw_r2r(mm4, mm5); // tmp7 +// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ + psllw_i2r(2, mm1); + + psllw_i2r(2, mm0); + + pmulhw_m2r(fix_141, mm1); // tmp21 +// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ +// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ + psllw_i2r(2, mm3); + movq_r2r(mm0, mm7); + + pmulhw_m2r(fix_n184, mm7); + movq_r2r(mm3, mm6); + + movq_m2r(*(wsptr), mm2); // tmp0,final1 + + pmulhw_m2r(fix_108n184, mm6); +// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ +// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ + movq_r2r(mm2, mm4); // final1 + + pmulhw_m2r(fix_184n261, mm0); + paddw_r2r(mm5, mm2); // tmp0+tmp7,final1 + + pmulhw_m2r(fix_184, mm3); + psubw_r2r(mm5, mm4); // tmp0-tmp7,final1 + +// tmp6 = tmp22 - tmp7; /* phase 2 */ + psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1 + + paddw_r2r(mm6, mm7); // tmp20 + psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1 + + paddw_r2r(mm0, mm3); // tmp22 + +// tmp5 = tmp21 - tmp6; + psubw_r2r(mm5, mm3); // tmp6 + +// tmp4 = tmp20 + tmp5; + movq_m2r(*(wsptr+1), mm0); // tmp1,final2 + psubw_r2r(mm3, mm1); // tmp5 + + movq_r2r(mm0, mm6); // final2 + paddw_r2r(mm3, mm0); // tmp1+tmp6,final2 + + /* Final output stage: scale down by a factor of 8 and range-limit */ + +// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) +// & RANGE_MASK]; final1 + + +// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) +// & RANGE_MASK]; final2 + psubw_r2r(mm3, mm6); // tmp1-tmp6,final2 + psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1] + + psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6] + + packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] + + movq_m2r(*(wsptr+2), mm5); // tmp2,final3 + packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] + +// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) +// & RANGE_MASK]; final3 + paddw_r2r(mm1, mm7); // tmp4 + movq_r2r(mm5, mm3); + + paddw_r2r(mm1, mm5); // tmp2+tmp5 + psubw_r2r(mm1, mm3); // tmp2-tmp5 + + psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2] + + movq_m2r(*(wsptr+3), mm4); // tmp3,final4 + psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5] + + + +// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) +// & RANGE_MASK]; +// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) +// & RANGE_MASK]; final4 + movq_r2r(mm4, mm6); + paddw_r2r(mm7, mm4); // tmp3+tmp4 + + psubw_r2r(mm7, mm6); // tmp3-tmp4 + psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4] + + psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3] + + /* + movq_r2m(mm4, *dummy); + fprintf(stderr, "3-4 %016llx\n", dummy); + movq_r2m(mm4, *dummy); + fprintf(stderr, "3+4 %016llx\n", dummy); + */ + + + packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] + + packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] + movq_r2r(mm2, mm4); + + movq_r2r(mm5, mm7); + punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] + + punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] + movq_r2r(mm2, mm1); + + punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] + + punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] + + punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] + + movq_r2r(mm7, mm6); + punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] + + movq_r2r(mm2, mm0); + punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] + + punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] + + movq_r2r(mm1, mm3); + + punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] + + dataptr += rskip; + movq_r2m(mm2, *(dataptr)); + + punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] + + dataptr += rskip; + movq_r2m(mm0, *(dataptr)); + + punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] + + punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] + + dataptr += rskip; + movq_r2m(mm1, *(dataptr)); + + dataptr += rskip; + movq_r2m(mm3, *(dataptr)); + +#else + __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + __s32 tmp10, tmp11, tmp12, tmp13; + __s32 z5, z10, z11, z12, z13; + __s16 *inptr; + __s32 *wsptr; + __u8 *outptr; + int ctr; + __s32 dcval; + __s32 workspace[64]; + + inptr = data; + wsptr = workspace; + for (ctr = 8; ctr > 0; ctr--) { + + if ((inptr[8] | inptr[16] | inptr[24] | + inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) { + dcval = inptr[0]; + wsptr[0] = dcval; + wsptr[8] = dcval; + wsptr[16] = dcval; + wsptr[24] = dcval; + wsptr[32] = dcval; + wsptr[40] = dcval; + wsptr[48] = dcval; + wsptr[56] = dcval; + + inptr++; + wsptr++; + continue; + } + + tmp0 = inptr[0]; + tmp1 = inptr[16]; + tmp2 = inptr[32]; + tmp3 = inptr[48]; + + tmp10 = tmp0 + tmp2; + tmp11 = tmp0 - tmp2; + + tmp13 = tmp1 + tmp3; + tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; + + tmp0 = tmp10 + tmp13; + tmp3 = tmp10 - tmp13; + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + tmp4 = inptr[8]; + tmp5 = inptr[24]; + tmp6 = inptr[40]; + tmp7 = inptr[56]; + + z13 = tmp6 + tmp5; + z10 = tmp6 - tmp5; + z11 = tmp4 + tmp7; + z12 = tmp4 - tmp7; + + tmp7 = z11 + z13; + tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); + + z5 = MULTIPLY(z10 + z12, FIX_1_847759065); + tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; + tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; + + tmp6 = tmp12 - tmp7; + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 + tmp5; + + wsptr[0] = (__s32) (tmp0 + tmp7); + wsptr[56] = (__s32) (tmp0 - tmp7); + wsptr[8] = (__s32) (tmp1 + tmp6); + wsptr[48] = (__s32) (tmp1 - tmp6); + wsptr[16] = (__s32) (tmp2 + tmp5); + wsptr[40] = (__s32) (tmp2 - tmp5); + wsptr[32] = (__s32) (tmp3 + tmp4); + wsptr[24] = (__s32) (tmp3 - tmp4); + + inptr++; + wsptr++; + } + + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++) { + outptr = &(odata[ctr*rskip]); + + tmp10 = wsptr[0] + wsptr[4]; + tmp11 = wsptr[0] - wsptr[4]; + + tmp13 = wsptr[2] + wsptr[6]; + tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13; + + tmp0 = tmp10 + tmp13; + tmp3 = tmp10 - tmp13; + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + z13 = wsptr[5] + wsptr[3]; + z10 = wsptr[5] - wsptr[3]; + z11 = wsptr[1] + wsptr[7]; + z12 = wsptr[1] - wsptr[7]; + + tmp7 = z11 + z13; + tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); + + z5 = MULTIPLY(z10 + z12, FIX_1_847759065); + tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; + tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; + + tmp6 = tmp12 - tmp7; + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 + tmp5; + + outptr[0] = RL(DESCALE(tmp0 + tmp7)); + outptr[7] = RL(DESCALE(tmp0 - tmp7)); + outptr[1] = RL(DESCALE(tmp1 + tmp6)); + outptr[6] = RL(DESCALE(tmp1 - tmp6)); + outptr[2] = RL(DESCALE(tmp2 + tmp5)); + outptr[5] = RL(DESCALE(tmp2 - tmp5)); + outptr[4] = RL(DESCALE(tmp3 + tmp4)); + outptr[3] = RL(DESCALE(tmp3 - tmp4)); + + wsptr += 8; + } +#endif +} +/* + +Main Routines + +This file contains most of the initialisation and control functions + +(C) Justin Schoeman 1998 + +*/ + +/* + +Private function + +Initialise all the cache-aliged data blocks + +*/ + +void RTjpeg_init_data(void) +{ + unsigned long dptr; + + dptr=(unsigned long)&(RTjpeg_alldata[0]); + dptr+=32; + dptr=dptr>>5; + dptr=dptr<<5; /* cache align data */ + + RTjpeg_block=(__s16 *)dptr; + dptr+=sizeof(__s16)*64; + RTjpeg_lqt=(__s32 *)dptr; + dptr+=sizeof(__s32)*64; + RTjpeg_cqt=(__s32 *)dptr; + dptr+=sizeof(__s32)*64; + RTjpeg_liqt=(__u32 *)dptr; + dptr+=sizeof(__u32)*64; + RTjpeg_ciqt=(__u32 *)dptr; +} + +/* + +External Function + +Re-set quality factor + +Input: buf -> pointer to 128 ints for quant values store to pass back to + init_decompress. + Q -> quality factor (192=best, 32=worst) +*/ + +void RTjpeg_init_Q(__u8 Q) +{ + int i; + __u64 qual; + + qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ + + for(i=0; i<64; i++) + { + RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); + if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; + RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); + if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; + RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); + RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); + RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; + RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; + } + + RTjpeg_lb8=0; + while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); + RTjpeg_lb8--; + RTjpeg_cb8=0; + while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); + RTjpeg_cb8--; + + RTjpeg_dct_init(); + RTjpeg_idct_init(); + RTjpeg_quant_init(); +} + +/* + +External Function + +Initialise compression. + +Input: buf -> pointer to 128 ints for quant values store to pass back to + init_decompress. + width -> width of image + height -> height of image + Q -> quality factor (192=best, 32=worst) + +*/ + +void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q) +{ + int i; + __u64 qual; + + RTjpeg_init_data(); + + RTjpeg_width=width; + RTjpeg_height=height; + RTjpeg_Ywidth = RTjpeg_width>>3; + RTjpeg_Ysize=width * height; + RTjpeg_Cwidth = RTjpeg_width>>4; + RTjpeg_Csize= (width>>1) * height; + + qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ + + for(i=0; i<64; i++) + { + RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); + if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; + RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); + if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; + RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); + RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); + RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; + RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; + } + + RTjpeg_lb8=0; + while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); + RTjpeg_lb8--; + RTjpeg_cb8=0; + while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); + RTjpeg_cb8--; + + RTjpeg_dct_init(); + RTjpeg_quant_init(); + + for(i=0; i<64; i++) + buf[i]=RTjpeg_liqt[i]; + for(i=0; i<64; i++) + buf[64+i]=RTjpeg_ciqt[i]; +} + +void RTjpeg_init_decompress(__u32 *buf, int width, int height) +{ + int i; + + RTjpeg_init_data(); + + RTjpeg_width=width; + RTjpeg_height=height; + RTjpeg_Ywidth = RTjpeg_width>>3; + RTjpeg_Ysize=width * height; + RTjpeg_Cwidth = RTjpeg_width>>4; + RTjpeg_Csize= (width>>1) * height; + + for(i=0; i<64; i++) + { + RTjpeg_liqt[i]=buf[i]; + RTjpeg_ciqt[i]=buf[i+64]; + } + + RTjpeg_lb8=0; + while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); + RTjpeg_lb8--; + RTjpeg_cb8=0; + while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); + RTjpeg_cb8--; + + RTjpeg_idct_init(); + +// RTjpeg_color_init(); +} + +int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp) +{ + __s8 * sb; + register __s8 * bp1 = bp + (RTjpeg_width<<3); + register __s8 * bp2 = bp + RTjpeg_Ysize; + register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); + register int i, j, k; + +#ifdef MMX + emms(); +#endif + sb=sp; +/* Y */ + for(i=RTjpeg_height>>1; i; i-=8) + { + for(j=0, k=0; j>1); + } + if(*sp==-1)sp++; + else + { + sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); + RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1); + } + } + bp+=RTjpeg_width<<3; + bp2+=RTjpeg_width<<2; + bp3+=RTjpeg_width<<2; + } +#ifdef MMX + emms(); +#endif +} + +void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp) +{ + register __s8 * bp1 = bp + (RTjpeg_width<<3); + register __s8 * bp2 = bp + RTjpeg_Ysize; + register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); + int i, j,k; + +#ifdef MMX + emms(); +#endif + +/* Y */ + for(i=RTjpeg_height>>1; i; i-=8) + { + for(k=0, j=0; j>1); + } + if(*sp==-1)sp++; + else + { + sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); + RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1); + } + } + bp+=RTjpeg_width<<4; + bp1+=RTjpeg_width<<4; + bp2+=RTjpeg_width<<2; + bp3+=RTjpeg_width<<2; + } +#ifdef MMX + emms(); +#endif +} + +void RTjpeg_decompress8(__s8 *sp, __u8 *bp) +{ + int i, j; + +#ifdef MMX + emms(); +#endif + +/* Y */ + for(i=0; i>5; + RTjpeg_old=(__s16 *)(tmp<<5); + } + if (!RTjpeg_old) + { + fprintf(stderr, "RTjpeg: Could not allocate memory\n"); + exit(-1); + } + bzero(RTjpeg_old, ((4*RTjpeg_width*RTjpeg_height))); +} + +#ifdef MMX + +int RTjpeg_bcomp(__s16 *old, mmx_t *mask) +{ + int i; + mmx_t *mold=(mmx_t *)old; + mmx_t *mblock=(mmx_t *)RTjpeg_block; + volatile mmx_t result; + static mmx_t neg=(mmx_t)(unsigned long long)0xffffffffffffffffULL; + + movq_m2r(*mask, mm7); + movq_m2r(neg, mm6); + pxor_r2r(mm5, mm5); + + for(i=0; i<8; i++) + { + movq_m2r(*(mblock++), mm0); + movq_m2r(*(mblock++), mm2); + movq_m2r(*(mold++), mm1); + movq_m2r(*(mold++), mm3); + psubsw_r2r(mm1, mm0); + psubsw_r2r(mm3, mm2); + movq_r2r(mm0, mm1); + movq_r2r(mm2, mm3); + pcmpgtw_r2r(mm7, mm0); + pcmpgtw_r2r(mm7, mm2); + pxor_r2r(mm6, mm1); + pxor_r2r(mm6, mm3); + pcmpgtw_r2r(mm7, mm1); + pcmpgtw_r2r(mm7, mm3); + por_r2r(mm0, mm5); + por_r2r(mm2, mm5); + por_r2r(mm1, mm5); + por_r2r(mm3, mm5); + } + movq_r2m(mm5, result); + + if(result.q) + { +// if(!RTjpeg_mtest) +// for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i]; + return 0; + } +// printf("."); + return 1; +} + +#else +int RTjpeg_bcomp(__s16 *old, __u16 *mask) +{ + int i; + + for(i=0; i<64; i++) + if(abs(old[i]-RTjpeg_block[i])>*mask) + { + if(!RTjpeg_mtest) + for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i]; + return 0; + } + return 1; +} +#endif + +void RTjpeg_set_test(int i) +{ + RTjpeg_mtest=i; +} + +int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask) +{ + __s8 * sb; +//rh __s16 *block; + register __s8 * bp1 = bp + (RTjpeg_width<<3); + register __s8 * bp2 = bp + RTjpeg_Ysize; + register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); + register int i, j, k; + +#ifdef MMX + emms(); + RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask); + RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask); +#else + RTjpeg_lmask=lmask; + RTjpeg_cmask=cmask; +#endif + + sb=sp; + block=RTjpeg_old; +/* Y */ + for(i=RTjpeg_height>>1; i; i-=8) + { + for(j=0, k=0; j>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+cbB)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + + y=(bufy[j+1]-16)*Ky; + + tmp=(y+crR)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+cbB)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + + } + bufy+=yskip; + } +} + + +void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride) +{ + int tmp; + int i, j; + __s32 y, crR, crG, cbG, cbB; + __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; + int oskip, yskip; + + if(stride==0) + oskip=RTjpeg_width*3; + else + oskip=2*stride-RTjpeg_width*3; + + yskip=RTjpeg_width; + + bufcb=&buf[RTjpeg_width*RTjpeg_height]; + bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4]; + bufy=&buf[0]; + bufoute=rgb; + bufouto=rgb+RTjpeg_width*3; + + for(i=0; i<(RTjpeg_height>>1); i++) + { + for(j=0; j>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+cbB)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + + y=(bufy[j+1]-16)*Ky; + + tmp=(y+crR)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+cbB)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + + y=(bufy[j+yskip]-16)*Ky; + + tmp=(y+crR)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+cbB)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + + y=(bufy[j+1+yskip]-16)*Ky; + + tmp=(y+crR)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+cbB)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + + } + bufoute+=oskip; + bufouto+=oskip; + bufy+=yskip<<1; + } +} + + +void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride) +{ + int tmp; + int i, j; + __s32 y, crR, crG, cbG, cbB; + __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; + int oskip, yskip; + + if(stride==0) + oskip=RTjpeg_width*4; + else + oskip = 2*stride-RTjpeg_width*4; + yskip=RTjpeg_width; + + bufcb=&buf[RTjpeg_width*RTjpeg_height]; + bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2]; + bufy=&buf[0]; + bufoute=rgb; + bufouto=rgb+RTjpeg_width*4; + + for(i=0; i<(RTjpeg_height>>1); i++) + { + for(j=0; j>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + bufoute++; + + y=(bufy[j+1]-16)*Ky; + + tmp=(y+cbB)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + bufoute++; + + y=(bufy[j+yskip]-16)*Ky; + + tmp=(y+cbB)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + bufouto++; + + y=(bufy[j+1+yskip]-16)*Ky; + + tmp=(y+cbB)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + bufouto++; + + } + bufoute+=oskip; + bufouto+=oskip; + bufy+=yskip<<1; + } +} + +void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride) +{ + int tmp; + int i, j; + __s32 y, crR, crG, cbG, cbB; + __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; + int oskip, yskip; + + if(stride==0) + oskip=RTjpeg_width*3; + else + oskip=2*stride - RTjpeg_width*3; + + yskip=RTjpeg_width; + + bufcb=&buf[RTjpeg_width*RTjpeg_height]; + bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4]; + bufy=&buf[0]; + bufoute=rgb; + bufouto=rgb+RTjpeg_width*3; + + for(i=0; i<(RTjpeg_height>>1); i++) + { + for(j=0; j>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + + y=(bufy[j+1]-16)*Ky; + + tmp=(y+cbB)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp); + + y=(bufy[j+yskip]-16)*Ky; + + tmp=(y+cbB)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + + y=(bufy[j+1+yskip]-16)*Ky; + + tmp=(y+cbB)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp); + + } + bufoute+=oskip; + bufouto+=oskip; + bufy+=yskip<<1; + } +} + +void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride) +{ + int tmp; + int i, j; + __s32 y, crR, crG, cbG, cbB; + __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto; + int oskip, yskip; + unsigned char r, g, b; + + if(stride==0) + oskip=RTjpeg_width*2; + else + oskip=2*stride-RTjpeg_width*2; + + yskip=RTjpeg_width; + + bufcb=&buf[RTjpeg_width*RTjpeg_height]; + bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4]; + bufy=&buf[0]; + bufoute=rgb; + bufouto=rgb+RTjpeg_width*2; + + for(i=0; i<(RTjpeg_height>>1); i++) + { + for(j=0; j>16; + b=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + g=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + r=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(int)((int)b >> 3); + tmp|=(int)(((int)g >> 2) << 5); + tmp|=(int)(((int)r >> 3) << 11); + *(bufoute++)=tmp&0xff; + *(bufoute++)=tmp>>8; + + + y=(bufy[j+1]-16)*Ky; + + tmp=(y+cbB)>>16; + b=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + g=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + r=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(int)((int)b >> 3); + tmp|=(int)(((int)g >> 2) << 5); + tmp|=(int)(((int)r >> 3) << 11); + *(bufoute++)=tmp&0xff; + *(bufoute++)=tmp>>8; + + y=(bufy[j+yskip]-16)*Ky; + + tmp=(y+cbB)>>16; + b=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + g=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + r=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(int)((int)b >> 3); + tmp|=(int)(((int)g >> 2) << 5); + tmp|=(int)(((int)r >> 3) << 11); + *(bufouto++)=tmp&0xff; + *(bufouto++)=tmp>>8; + + y=(bufy[j+1+yskip]-16)*Ky; + + tmp=(y+cbB)>>16; + b=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y-crG-cbG)>>16; + g=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(y+crR)>>16; + r=(tmp>255)?255:((tmp<0)?0:tmp); + tmp=(int)((int)b >> 3); + tmp|=(int)(((int)g >> 2) << 5); + tmp|=(int)(((int)r >> 3) << 11); + *(bufouto++)=tmp&0xff; + *(bufouto++)=tmp>>8; + + } + bufoute+=oskip; + bufouto+=oskip; + bufy+=yskip<<1; + } +} + +/* fix stride */ + +void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride) +{ + bcopy(buf, rgb, RTjpeg_width*RTjpeg_height); +} + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/RTjpegN.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/RTjpegN.h Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,58 @@ +/* + RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za) + + With modifications by: + (c) 1998, 1999 by Joerg Walter + and + (c) 1999 by Wim Taymans + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#ifndef _I386_TYPES_H +typedef unsigned char __u8; +typedef unsigned short __u16; +typedef unsigned long __u32; +typedef unsigned long long __u64; +typedef signed char __s8; +typedef signed short __s16; +typedef signed long __s32; +#endif + +extern void RTjpeg_init_Q(__u8 Q); +extern void RTjpeg_init_compress(long unsigned int *buf, int width, int height, __u8 Q); +extern void RTjpeg_init_decompress(long unsigned int *buf, int width, int height); +extern int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp); +extern int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp); +extern void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp); +extern void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp); +extern int RTjpeg_compress8(__s8 *sp, unsigned char *bp); +extern void RTjpeg_decompress8(__s8 *sp, __u8 *bp); + +extern void RTjpeg_init_mcompress(void); +extern int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask); +extern int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask); +extern int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask); +extern void RTjpeg_set_test(int i); + +extern void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride); +extern void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride); +extern void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride); +extern void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride); +extern void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride); +extern void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride); + + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/alaw.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/alaw.h Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,71 @@ +// Generated by TOOLS/alaw-gen.c + +short alaw2short[]={ + -5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736, + -7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784, + -2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368, + -3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392, +-22016,-20992,-24064,-23040,-17920,-16896,-19968,-18944, +-30208,-29184,-32256,-31232,-26112,-25088,-28160,-27136, +-11008,-10496,-12032,-11520, -8960, -8448, -9984, -9472, +-15104,-14592,-16128,-15616,-13056,-12544,-14080,-13568, + -344, -328, -376, -360, -280, -264, -312, -296, + -472, -456, -504, -488, -408, -392, -440, -424, + -88, -72, -120, -104, -24, -8, -56, -40, + -216, -200, -248, -232, -152, -136, -184, -168, + -1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184, + -1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696, + -688, -656, -752, -720, -560, -528, -624, -592, + -944, -912, -1008, -976, -816, -784, -880, -848, + 5504, 5248, 6016, 5760, 4480, 4224, 4992, 4736, + 7552, 7296, 8064, 7808, 6528, 6272, 7040, 6784, + 2752, 2624, 3008, 2880, 2240, 2112, 2496, 2368, + 3776, 3648, 4032, 3904, 3264, 3136, 3520, 3392, + 22016, 20992, 24064, 23040, 17920, 16896, 19968, 18944, + 30208, 29184, 32256, 31232, 26112, 25088, 28160, 27136, + 11008, 10496, 12032, 11520, 8960, 8448, 9984, 9472, + 15104, 14592, 16128, 15616, 13056, 12544, 14080, 13568, + 344, 328, 376, 360, 280, 264, 312, 296, + 472, 456, 504, 488, 408, 392, 440, 424, + 88, 72, 120, 104, 24, 8, 56, 40, + 216, 200, 248, 232, 152, 136, 184, 168, + 1376, 1312, 1504, 1440, 1120, 1056, 1248, 1184, + 1888, 1824, 2016, 1952, 1632, 1568, 1760, 1696, + 688, 656, 752, 720, 560, 528, 624, 592, + 944, 912, 1008, 976, 816, 784, 880, 848 +}; + +short ulaw2short[]={ +-32124,-31100,-30076,-29052,-28028,-27004,-25980,-24956, +-23932,-22908,-21884,-20860,-19836,-18812,-17788,-16764, +-15996,-15484,-14972,-14460,-13948,-13436,-12924,-12412, +-11900,-11388,-10876,-10364, -9852, -9340, -8828, -8316, + -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140, + -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092, + -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004, + -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980, + -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436, + -1372, -1308, -1244, -1180, -1116, -1052, -988, -924, + -876, -844, -812, -780, -748, -716, -684, -652, + -620, -588, -556, -524, -492, -460, -428, -396, + -372, -356, -340, -324, -308, -292, -276, -260, + -244, -228, -212, -196, -180, -164, -148, -132, + -120, -112, -104, -96, -88, -80, -72, -64, + -56, -48, -40, -32, -24, -16, -8, 0, + 32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956, + 23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764, + 15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412, + 11900, 11388, 10876, 10364, 9852, 9340, 8828, 8316, + 7932, 7676, 7420, 7164, 6908, 6652, 6396, 6140, + 5884, 5628, 5372, 5116, 4860, 4604, 4348, 4092, + 3900, 3772, 3644, 3516, 3388, 3260, 3132, 3004, + 2876, 2748, 2620, 2492, 2364, 2236, 2108, 1980, + 1884, 1820, 1756, 1692, 1628, 1564, 1500, 1436, + 1372, 1308, 1244, 1180, 1116, 1052, 988, 924, + 876, 844, 812, 780, 748, 716, 684, 652, + 620, 588, 556, 524, 492, 460, 428, 396, + 372, 356, 340, 324, 308, 292, 276, 260, + 244, 228, 212, 196, 180, 164, 148, 132, + 120, 112, 104, 96, 88, 80, 72, 64, + 56, 48, 40, 32, 24, 16, 8, 0 +}; diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/cinepak.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/cinepak.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,886 @@ +/* ------------------------------------------------------------------------ + * Radius Cinepak Video Decoder + * + * Dr. Tim Ferguson, 2001. + * For more details on the algorithm: + * http://www.csse.monash.edu.au/~timf/videocodec.html + * + * This is basically a vector quantiser with adaptive vector density. The + * frame is segmented into 4x4 pixel blocks, and each block is coded using + * either 1 or 4 vectors. + * + * There are still some issues with this code yet to be resolved. In + * particular with decoding in the strip boundaries. + * ------------------------------------------------------------------------ */ +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "mp_msg.h" +#include "bswap.h" + +#include "libvo/img_format.h" +#include "mp_image.h" + +#define DBUG 0 +#define MAX_STRIPS 32 + +/* ------------------------------------------------------------------------ */ +typedef struct +{ + unsigned char y0, y1, y2, y3; + char u, v; + + // These variables are for YV12 output: The v1 vars are for + // when the vector is doublesized and used by itself to paint a + // 4x4 block. + // This quad (y0 y0 y1 y1) is used on the 2 upper rows. + unsigned long yv12_v1_u; + // This quad (y2 y2 y3 y3) is used on the 2 lower rows. + unsigned long yv12_v1_l; + // The v4 vars are for when the vector is used as 1 of 4 vectors + // to paint a 4x4 block. + // Upper pair (y0 y1): + unsigned short yv12_v4_u; + // Lower pair (y2 y3): + unsigned short yv12_v4_l; + + // These longs are for YUY2 output: The v1 vars are for when the + // vector is doublesized and used by itself to paint a 4x4 block. + // The names stand for the upper-left, upper-right, + // lower-left, and lower-right YUY2 pixel pairs. + unsigned long yuy2_v1_ul, yuy2_v1_ur; + unsigned long yuy2_v1_ll, yuy2_v1_lr; + // The v4 vars are for when the vector is used as 1 of 4 vectors + // to paint a 4x4 block. The names stand for upper and lower + // YUY2 pixel pairs. + unsigned long yuy2_v4_u, yuy2_v4_l; + + // These longs are for BGR32 output + unsigned long rgb0, rgb1, rgb2, rgb3; + + // These char arrays are for BGR24 output + unsigned char r[4], g[4], b[4]; +} cvid_codebook; + +typedef struct { + cvid_codebook *v4_codebook[MAX_STRIPS]; + cvid_codebook *v1_codebook[MAX_STRIPS]; + unsigned long strip_num; +} cinepak_info; + + +/* ------------------------------------------------------------------------ */ +static unsigned char *in_buffer, uiclip[1024], *uiclp = NULL; + +#define SCALEBITS 16 +#define ONE_HALF ((long) 1 << (SCALEBITS-1)) +#define FIX(x) ((long) ((x) * (1L<y1)<<24)|(cb->y1<<16)|(((unsigned char)cb->y0)<<8)|(cb->y0)) +#define PACK_YV12_V4_Y(cb,y0,y1) le2me_16((((unsigned char)cb->y1)<<8)|(cb->y0)) + +static inline void read_codebook_yv12(cvid_codebook *c, int mode) +{ +unsigned char y0, y1, y2, y3, u, v; +int y_uv; + + if(mode) /* black and white */ + { + c->y0 = get_byte(); + c->y1 = get_byte(); + c->y2 = get_byte(); + c->y3 = get_byte(); + c->u = c->v = 128; + } + else /* colour */ + { + y0 = get_byte(); /* luma */ + y1 = get_byte(); + y2 = get_byte(); + y3 = get_byte(); + u = 128+get_byte(); /* chroma */ + v = 128+get_byte(); + + /* YUV * inv(CinYUV) + * | Y | | 1 -0.0655 0.0110 | | CY | + * | Cb | = | 0 1.1656 -0.0062 | | CU | + * | Cr | | 0 0.0467 1.4187 | | CV | + */ + y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS); + c->y0 = uiclp[y0 + y_uv]; + c->y1 = uiclp[y1 + y_uv]; + c->y2 = uiclp[y2 + y_uv]; + c->y3 = uiclp[y3 + y_uv]; + c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)]; + c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)]; + + c->yv12_v1_u = PACK_YV12_V1_Y(c, y0, y1); + c->yv12_v1_l = PACK_YV12_V1_Y(c, y2, y3); + c->yv12_v4_u = PACK_YV12_V4_Y(c, y0, y1); + c->yv12_v4_l = PACK_YV12_V4_Y(c, y2, y3); + } +} + +/* ---------------------------------------------------------------------- */ + +inline void cvid_v1_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) +{ +unsigned char *p; +int stride; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + // take care of the luminance + stride = mpi->stride[0]; + p = mpi->planes[0]+y*stride+x; + *((unsigned int*)p)=cb->yv12_v1_u; + *((unsigned int*)(p+stride))=cb->yv12_v1_u; + *((unsigned int*)(p+stride*2))=cb->yv12_v1_l; + *((unsigned int*)(p+stride*3))=cb->yv12_v1_l; + + // now for the chrominance + x/=2; y/=2; + + stride = mpi->stride[1]; + p = mpi->planes[1]+y*stride+x; + p[0]=p[1]=p[stride]=p[stride+1]=cb->u; + + stride = mpi->stride[2]; + p = mpi->planes[2]+y*stride+x; + p[0]=p[1]=p[stride]=p[stride+1]=cb->v; + +} + +/* ---------------------------------------------------------------------- */ +inline void cvid_v4_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, + cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) +{ +unsigned char *p; +int stride; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + // take care of the luminance + stride = mpi->stride[0]; + p = mpi->planes[0]+y*stride+x; + ((unsigned short*)p)[0]=cb0->yv12_v4_u; + ((unsigned short*)p)[1]=cb1->yv12_v4_u; + ((unsigned short*)(p+stride))[0]=cb0->yv12_v4_l; + ((unsigned short*)(p+stride))[1]=cb1->yv12_v4_l; + ((unsigned short*)(p+stride*2))[0]=cb2->yv12_v4_u; + ((unsigned short*)(p+stride*2))[1]=cb3->yv12_v4_u; + ((unsigned short*)(p+stride*3))[0]=cb2->yv12_v4_l; + ((unsigned short*)(p+stride*3))[1]=cb3->yv12_v4_l; + + // now for the chrominance + x/=2; y/=2; + + stride = mpi->stride[1]; + p = mpi->planes[1]+y*stride+x; + p[0]=cb0->u; p[1]=cb1->u; + p[stride]=cb2->u; p[stride+1]=cb3->u; + + stride = mpi->stride[2]; + p = mpi->planes[2]+y*stride+x; + p[0]=cb0->v; p[1]=cb1->v; + p[stride]=cb2->v; p[stride+1]=cb3->v; + +} + +/* ---------------------------------------------------------------------- */ + +#define PACK_YUY2(cb,y0,y1,u,v) le2me_32(((((unsigned char)cb->v)<<24)|(cb->y1<<16)|(((unsigned char)cb->u)<<8)|(cb->y0))) + +static inline void read_codebook_yuy2(cvid_codebook *c, int mode) +{ +unsigned char y0, y1, y2, y3, u, v; +int y_uv; + + if(mode) /* black and white */ + { + c->y0 = get_byte(); + c->y1 = get_byte(); + c->y2 = get_byte(); + c->y3 = get_byte(); + c->u = c->v = 128; + } + else /* colour */ + { + y0 = get_byte(); /* luma */ + y1 = get_byte(); + y2 = get_byte(); + y3 = get_byte(); + u = 128+get_byte(); /* chroma */ + v = 128+get_byte(); + + /* YUV * inv(CinYUV) + * | Y | | 1 -0.0655 0.0110 | | CY | + * | Cb | = | 0 1.1656 -0.0062 | | CU | + * | Cr | | 0 0.0467 1.4187 | | CV | + */ + y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS); + c->y0 = uiclp[y0 + y_uv]; + c->y1 = uiclp[y1 + y_uv]; + c->y2 = uiclp[y2 + y_uv]; + c->y3 = uiclp[y3 + y_uv]; + c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)]; + c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)]; + + c->yuy2_v4_u = PACK_YUY2(c, y0, y1, u, v); + c->yuy2_v4_l = PACK_YUY2(c, y2, y3, u, v); + c->yuy2_v1_ul = PACK_YUY2(c, y0, y0, u, v); + c->yuy2_v1_ur = PACK_YUY2(c, y1, y1, u, v); + c->yuy2_v1_ll = PACK_YUY2(c, y2, y2, u, v); + c->yuy2_v1_lr = PACK_YUY2(c, y3, y3, u, v); + } +} + +/* ------------------------------------------------------------------------ */ +inline void cvid_v1_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) +{ +int stride = mpi->stride[0] / 2; +unsigned long *vptr = (unsigned long *)mpi->planes[0]; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + vptr += (y * mpi->stride[0] + x) / 2; + + vptr[0] = cb->yuy2_v1_ul; + vptr[1] = cb->yuy2_v1_ur; + + vptr += stride; + vptr[0] = cb->yuy2_v1_ul; + vptr[1] = cb->yuy2_v1_ur; + + vptr += stride; + vptr[0] = cb->yuy2_v1_ll; + vptr[1] = cb->yuy2_v1_lr; + + vptr += stride; + vptr[0] = cb->yuy2_v1_ll; + vptr[1] = cb->yuy2_v1_lr; +} + + +/* ------------------------------------------------------------------------ */ +inline void cvid_v4_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, + cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) +{ +int stride = mpi->stride[0] / 2; +unsigned long *vptr = (unsigned long *)mpi->planes[0]; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + vptr += (y * mpi->stride[0] + x) / 2; + + vptr[0] = cb0->yuy2_v4_u; + vptr[1] = cb1->yuy2_v4_u; + + vptr += stride; + vptr[0] = cb0->yuy2_v4_l; + vptr[1] = cb1->yuy2_v4_l; + + vptr += stride; + vptr[0] = cb2->yuy2_v4_u; + vptr[1] = cb3->yuy2_v4_u; + + vptr += stride; + vptr[0] = cb2->yuy2_v4_l; + vptr[1] = cb3->yuy2_v4_l; +} + +/* ---------------------------------------------------------------------- */ +static inline void read_codebook_32(cvid_codebook *c, int mode) +{ +int uvr, uvg, uvb; + + if(mode) /* black and white */ + { + c->y0 = get_byte(); + c->y1 = get_byte(); + c->y2 = get_byte(); + c->y3 = get_byte(); + c->u = c->v = 0; + + c->rgb0 = (c->y0 << 16) | (c->y0 << 8) | c->y0; + c->rgb1 = (c->y1 << 16) | (c->y1 << 8) | c->y1; + c->rgb2 = (c->y2 << 16) | (c->y2 << 8) | c->y2; + c->rgb3 = (c->y3 << 16) | (c->y3 << 8) | c->y3; + } + else /* colour */ + { + c->y0 = get_byte(); /* luma */ + c->y1 = get_byte(); + c->y2 = get_byte(); + c->y3 = get_byte(); + c->u = get_byte(); /* chroma */ + c->v = get_byte(); + + uvr = c->v << 1; + uvg = -((c->u+1) >> 1) - c->v; + uvb = c->u << 1; + + c->rgb0 = le2me_32((uiclp[c->y0 + uvr] << 16) | (uiclp[c->y0 + uvg] << 8) | uiclp[c->y0 + uvb]); + c->rgb1 = le2me_32((uiclp[c->y1 + uvr] << 16) | (uiclp[c->y1 + uvg] << 8) | uiclp[c->y1 + uvb]); + c->rgb2 = le2me_32((uiclp[c->y2 + uvr] << 16) | (uiclp[c->y2 + uvg] << 8) | uiclp[c->y2 + uvb]); + c->rgb3 = le2me_32((uiclp[c->y3 + uvr] << 16) | (uiclp[c->y3 + uvg] << 8) | uiclp[c->y3 + uvb]); + } +} + + +/* ------------------------------------------------------------------------ */ +inline void cvid_v1_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) +{ +int stride = mpi->stride[0]; +unsigned long *vptr = (unsigned long *)mpi->planes[0]; +unsigned long rgb; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + vptr += (y * stride + x); + + vptr[0] = rgb = cb->rgb0; vptr[1] = rgb; + vptr[2] = rgb = cb->rgb1; vptr[3] = rgb; + vptr += stride; + vptr[0] = rgb = cb->rgb0; vptr[1] = rgb; + vptr[2] = rgb = cb->rgb1; vptr[3] = rgb; + vptr += stride; + vptr[0] = rgb = cb->rgb2; vptr[1] = rgb; + vptr[2] = rgb = cb->rgb3; vptr[3] = rgb; + vptr += stride; + vptr[0] = rgb = cb->rgb2; vptr[1] = rgb; + vptr[2] = rgb = cb->rgb3; vptr[3] = rgb; +} + + +/* ------------------------------------------------------------------------ */ +inline void cvid_v4_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, + cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) +{ +int stride = mpi->stride[0]; +unsigned long *vptr = (unsigned long *)mpi->planes[0]; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + vptr += (y * stride + x); + + vptr[0] = cb0->rgb0; + vptr[1] = cb0->rgb1; + vptr[2] = cb1->rgb0; + vptr[3] = cb1->rgb1; + vptr += stride; + vptr[0] = cb0->rgb2; + vptr[1] = cb0->rgb3; + vptr[2] = cb1->rgb2; + vptr[3] = cb1->rgb3; + vptr += stride; + vptr[0] = cb2->rgb0; + vptr[1] = cb2->rgb1; + vptr[2] = cb3->rgb0; + vptr[3] = cb3->rgb1; + vptr += stride; + vptr[0] = cb2->rgb2; + vptr[1] = cb2->rgb3; + vptr[2] = cb3->rgb2; + vptr[3] = cb3->rgb3; +} + + +/* ---------------------------------------------------------------------- */ +static inline void read_codebook_24(cvid_codebook *c, int mode) +{ +int uvr, uvg, uvb; + + if(mode) /* black and white */ + { + c->y0 = get_byte(); + c->y1 = get_byte(); + c->y2 = get_byte(); + c->y3 = get_byte(); + c->u = c->v = 0; + + c->r[0] = c->g[0] = c->b[0] = c->y0; + c->r[1] = c->g[1] = c->b[1] = c->y1; + c->r[2] = c->g[2] = c->b[2] = c->y2; + c->r[3] = c->g[3] = c->b[3] = c->y3; + } + else /* colour */ + { + c->y0 = get_byte(); /* luma */ + c->y1 = get_byte(); + c->y2 = get_byte(); + c->y3 = get_byte(); + c->u = get_byte(); /* chroma */ + c->v = get_byte(); + + uvr = c->v << 1; + uvg = -((c->u+1) >> 1) - c->v; + uvb = c->u << 1; + + c->r[0] = uiclp[c->y0 + uvr]; c->g[0] = uiclp[c->y0 + uvg]; c->b[0] = uiclp[c->y0 + uvb]; + c->r[1] = uiclp[c->y1 + uvr]; c->g[1] = uiclp[c->y1 + uvg]; c->b[1] = uiclp[c->y1 + uvb]; + c->r[2] = uiclp[c->y2 + uvr]; c->g[2] = uiclp[c->y2 + uvg]; c->b[2] = uiclp[c->y2 + uvb]; + c->r[3] = uiclp[c->y3 + uvr]; c->g[3] = uiclp[c->y3 + uvg]; c->b[3] = uiclp[c->y3 + uvb]; + } +} + + +/* ------------------------------------------------------------------------ */ +inline void cvid_v1_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) +{ +unsigned char r, g, b; +int stride = (mpi->stride[0]-4)*3; +unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + *vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; + *vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; + vptr += stride; + *vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; + *vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; + vptr += stride; + *vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; + *vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; + vptr += stride; + *vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; + *vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3]; + *vptr++ = b; *vptr++ = g; *vptr++ = r; +} + + +/* ------------------------------------------------------------------------ */ +inline void cvid_v4_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, + cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) +{ +int stride = (mpi->stride[0]-4)*3; +unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3; + + if(y+3>=(unsigned int)mpi->height) return; // avoid sig11 + + *vptr++ = cb0->b[0]; *vptr++ = cb0->g[0]; *vptr++ = cb0->r[0]; + *vptr++ = cb0->b[1]; *vptr++ = cb0->g[1]; *vptr++ = cb0->r[1]; + *vptr++ = cb1->b[0]; *vptr++ = cb1->g[0]; *vptr++ = cb1->r[0]; + *vptr++ = cb1->b[1]; *vptr++ = cb1->g[1]; *vptr++ = cb1->r[1]; + vptr += stride; + *vptr++ = cb0->b[2]; *vptr++ = cb0->g[2]; *vptr++ = cb0->r[2]; + *vptr++ = cb0->b[3]; *vptr++ = cb0->g[3]; *vptr++ = cb0->r[3]; + *vptr++ = cb1->b[2]; *vptr++ = cb1->g[2]; *vptr++ = cb1->r[2]; + *vptr++ = cb1->b[3]; *vptr++ = cb1->g[3]; *vptr++ = cb1->r[3]; + vptr += stride; + *vptr++ = cb2->b[0]; *vptr++ = cb2->g[0]; *vptr++ = cb2->r[0]; + *vptr++ = cb2->b[1]; *vptr++ = cb2->g[1]; *vptr++ = cb2->r[1]; + *vptr++ = cb3->b[0]; *vptr++ = cb3->g[0]; *vptr++ = cb3->r[0]; + *vptr++ = cb3->b[1]; *vptr++ = cb3->g[1]; *vptr++ = cb3->r[1]; + vptr += stride; + *vptr++ = cb2->b[2]; *vptr++ = cb2->g[2]; *vptr++ = cb2->r[2]; + *vptr++ = cb2->b[3]; *vptr++ = cb2->g[3]; *vptr++ = cb2->r[3]; + *vptr++ = cb3->b[2]; *vptr++ = cb3->g[2]; *vptr++ = cb3->r[2]; + *vptr++ = cb3->b[3]; *vptr++ = cb3->g[3]; *vptr++ = cb3->r[3]; +} + +/* ------------------------------------------------------------------------ + * Call this function once at the start of the sequence and save the + * returned context for calls to decode_cinepak(). + */ +void *decode_cinepak_init(void) +{ +cinepak_info *cvinfo; +int i, x; + + if((cvinfo = calloc(sizeof(cinepak_info), 1)) == NULL) return NULL; + cvinfo->strip_num = 0; + + if(uiclp == NULL) + { + uiclp = uiclip+512; + for(i = -512; i < 512; i++) + uiclp[i] = (i < 0 ? 0 : (i > 255 ? 255 : i)); + } + + for(i = 0, x = -128; i < 256; i++, x++) + { + CU_Y_tab[i] = (-FIX(0.0655)) * x; + CV_Y_tab[i] = (FIX(0.0110)) * x + ONE_HALF; + CU_Cb_tab[i] = (FIX(1.1656)) * x; + CV_Cb_tab[i] = (-FIX(0.0062)) * x + ONE_HALF + FIX(128); + CU_Cr_tab[i] = (FIX(0.0467)) * x; + CV_Cr_tab[i] = (FIX(1.4187)) * x + ONE_HALF + FIX(128); + } + + return (void *)cvinfo; +} + + +/* ------------------------------------------------------------------------ + * This function decodes a buffer containing a Cinepak encoded frame. + * + * context - the context created by decode_cinepak_init(). + * buf - the input buffer to be decoded + * size - the size of the input buffer + * frame - the output frame buffer + * width - the width of the output frame + * height - the height of the output frame + * bit_per_pixel - the number of bits per pixel allocated to the output + * frame; depths support: + * 32: BGR32 + * 24: BGR24 + * 16: YUY2 + * 12: YV12 + */ +void decode_cinepak(void *context, unsigned char *buf, int size, mp_image_t *mpi) +{ +cinepak_info *cvinfo = (cinepak_info *)context; +cvid_codebook *v4_codebook, *v1_codebook, *codebook = NULL; +unsigned long x, y, y_bottom, frame_flags, strips, cv_width, cv_height, cnum, + strip_id, chunk_id, x0, y0, x1, y1, ci, flag, mask; +long len, top_size, chunk_size; +unsigned int i, cur_strip, d0, d1, d2, d3; +int modulo; +void (*read_codebook)(cvid_codebook *c, int mode) = NULL; +void (*cvid_v1)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) = NULL; +void (*cvid_v4)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0, + cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) = NULL; + + x = y = 0; + y_bottom = 0; + in_buffer = buf; + + frame_flags = get_byte(); + len = get_byte() << 16; + len |= get_byte() << 8; + len |= get_byte(); + + switch(mpi->imgfmt) + { + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_YV12: // YV12 + read_codebook = read_codebook_yv12; + cvid_v1 = cvid_v1_yv12; + cvid_v4 = cvid_v4_yv12; + break; + case IMGFMT_YUY2: // YUY2 + read_codebook = read_codebook_yuy2; + cvid_v1 = cvid_v1_yuy2; + cvid_v4 = cvid_v4_yuy2; + break; + case IMGFMT_BGR24: // BGR24 + read_codebook = read_codebook_24; + cvid_v1 = cvid_v1_24; + cvid_v4 = cvid_v4_24; + break; + case IMGFMT_BGR32: // BGR32 + read_codebook = read_codebook_32; + cvid_v1 = cvid_v1_32; + cvid_v4 = cvid_v4_32; + break; + } + + if(len != size) + { + if(len & 0x01) len++; /* AVIs tend to have a size mismatch */ + if(len != size) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: corruption %d (QT/AVI) != %ld (CV)\n", size, len); + // return; + } + } + + cv_width = get_word(); + cv_height = get_word(); + strips = get_word(); + + if(strips > cvinfo->strip_num) + { + if(strips >= MAX_STRIPS) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: strip overflow (more than %d)\n", MAX_STRIPS); + return; + } + + for(i = cvinfo->strip_num; i < strips; i++) + { + if((cvinfo->v4_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v4 alloc err\n"); + return; + } + + if((cvinfo->v1_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v1 alloc err\n"); + return; + } + } + } + cvinfo->strip_num = strips; + +#if DBUG + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: <%ld,%ld> strips %ld\n", cv_width, cv_height, strips); +#endif + + for(cur_strip = 0; cur_strip < strips; cur_strip++) + { + v4_codebook = cvinfo->v4_codebook[cur_strip]; + v1_codebook = cvinfo->v1_codebook[cur_strip]; + + if((cur_strip > 0) && (!(frame_flags & 0x01))) + { + memcpy(cvinfo->v4_codebook[cur_strip], cvinfo->v4_codebook[cur_strip-1], 260 * sizeof(cvid_codebook)); + memcpy(cvinfo->v1_codebook[cur_strip], cvinfo->v1_codebook[cur_strip-1], 260 * sizeof(cvid_codebook)); + } + + strip_id = get_word(); /* 1000 = key strip, 1100 = iter strip */ + top_size = get_word(); + y0 = get_word(); /* FIXME: most of these are ignored at the moment */ + x0 = get_word(); + y1 = get_word(); + x1 = get_word(); + + y_bottom += y1; + top_size -= 12; + x = 0; +// if(x1 != (unsigned int)mpi->width) +// mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: Warning x1 (%ld) != width (%d)\n", x1, mpi->width); + +//x1 = mpi->width; +#if DBUG + mp_msg(MSGT_DECVIDEO, MSGL_WARN, " %d) %04lx %04ld <%ld,%ld> <%ld,%ld> yt %ld %d\n", + cur_strip, strip_id, top_size, x0, y0, x1, y1, y_bottom); +#endif + + while(top_size > 0) + { + chunk_id = get_word(); + chunk_size = get_word(); + +#if DBUG + mp_msg(MSGT_DECVIDEO, MSGL_WARN, " %04lx %04ld\n", chunk_id, chunk_size); +#endif + top_size -= chunk_size; + chunk_size -= 4; + + switch(chunk_id) + { + /* -------------------- Codebook Entries -------------------- */ + case 0x2000: + case 0x2200: + modulo = chunk_size % 6; + codebook = (chunk_id == 0x2200 ? v1_codebook : v4_codebook); + cnum = (chunk_size - modulo) / 6; + for(i = 0; i < cnum; i++) read_codebook(codebook+i, 0); + while (modulo--) + in_buffer++; + break; + + case 0x2400: + case 0x2600: /* 8 bit per pixel */ + codebook = (chunk_id == 0x2600 ? v1_codebook : v4_codebook); + cnum = chunk_size/4; + for(i = 0; i < cnum; i++) read_codebook(codebook+i, 1); + break; + + case 0x2100: + case 0x2300: + codebook = (chunk_id == 0x2300 ? v1_codebook : v4_codebook); + + ci = 0; + while(chunk_size > 3) + { + flag = get_long(); + chunk_size -= 4; + + for(i = 0; i < 32; i++) + { + if(flag & 0x80000000) + { + chunk_size -= 6; + read_codebook(codebook+ci, 0); + } + + ci++; + flag <<= 1; + } + } + while(chunk_size > 0) { skip_byte(); chunk_size--; } + break; + + case 0x2500: + case 0x2700: /* 8 bit per pixel */ + codebook = (chunk_id == 0x2700 ? v1_codebook : v4_codebook); + + ci = 0; + while(chunk_size > 0) + { + flag = get_long(); + chunk_size -= 4; + + for(i = 0; i < 32; i++) + { + if(flag & 0x80000000) + { + chunk_size -= 4; + read_codebook(codebook+ci, 1); + } + + ci++; + flag <<= 1; + } + } + while(chunk_size > 0) { skip_byte(); chunk_size--; } + break; + + /* -------------------- Frame -------------------- */ + case 0x3000: + while((chunk_size > 0) && (y < y_bottom)) + { + flag = get_long(); + chunk_size -= 4; + + for(i = 0; i < 32; i++) + { + if(y >= y_bottom) break; + if(flag & 0x80000000) /* 4 bytes per block */ + { + d0 = get_byte(); + d1 = get_byte(); + d2 = get_byte(); + d3 = get_byte(); + chunk_size -= 4; + cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3); + } + else /* 1 byte per block */ + { + cvid_v1(mpi, x, y, v1_codebook + get_byte()); + chunk_size--; + } + + x += 4; + if(x >= (unsigned int)x1) + { + x = 0; + y += 4; + } + flag <<= 1; + } + } + while(chunk_size > 0) { skip_byte(); chunk_size--; } + break; + + case 0x3100: + while((chunk_size > 0) && (y < y_bottom)) + { + /* ---- flag bits: 0 = SKIP, 10 = V1, 11 = V4 ---- */ + flag = (unsigned long)get_long(); + chunk_size -= 4; + mask = 0x80000000; + + while((mask) && (y < y_bottom)) + { + if(flag & mask) + { + if(mask == 1) + { + if(chunk_size < 0) break; + flag = (unsigned long)get_long(); + chunk_size -= 4; + mask = 0x80000000; + } + else mask >>= 1; + + if(flag & mask) /* V4 */ + { + d0 = get_byte(); + d1 = get_byte(); + d2 = get_byte(); + d3 = get_byte(); + chunk_size -= 4; + cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3); + } + else /* V1 */ + { + chunk_size--; + cvid_v1(mpi, x, y, v1_codebook + get_byte()); + } + } /* else SKIP */ + + mask >>= 1; + x += 4; + if(x >= (unsigned int)x1) + { + x = 0; + y += 4; + } + } + } + + while(chunk_size > 0) { skip_byte(); chunk_size--; } + break; + + case 0x3200: /* each byte is a V1 codebook */ + while((chunk_size > 0) && (y < y_bottom)) + { + cvid_v1(mpi, x, y, v1_codebook + get_byte()); + chunk_size--; + x += 4; + if(x >= (unsigned int)x1) + { + x = 0; + y += 4; + } + } + while(chunk_size > 0) { skip_byte(); chunk_size--; } + break; + + default: + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: unknown chunk_id %08lx\n", chunk_id); + while(chunk_size > 0) { skip_byte(); chunk_size--; } + break; + } + } + } + + if(len != size) + { + if(len & 0x01) len++; /* AVIs tend to have a size mismatch */ + if(len != size) + { + long xlen; + skip_byte(); + xlen = get_byte() << 16; + xlen |= get_byte() << 8; + xlen |= get_byte(); /* Read Len */ + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: END INFO chunk size %d cvid size1 %ld cvid size2 %ld\n", size, len, xlen); + } + } +} + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/cyuv.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/cyuv.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,135 @@ +/* ------------------------------------------------------------------------ + * Creative YUV Video Decoder + * + * Dr. Tim Ferguson, 2001. + * For more details on the algorithm: + * http://www.csse.monash.edu.au/~timf/videocodec.html + * + * This is a very simple predictive coder. A video frame is coded in YUV411 + * format. The first pixel of each scanline is coded using the upper four + * bits of its absolute value. Subsequent pixels for the scanline are coded + * using the difference between the last pixel and the current pixel (DPCM). + * The DPCM values are coded using a 16 entry table found at the start of the + * frame. Thus four bits per component are used and are as follows: + * UY VY YY UY VY YY UY VY... + * This code assumes the frame width will be a multiple of four pixels. This + * should probably be fixed. + * ------------------------------------------------------------------------ */ +#include +#include +#include +#include +#include + +#include "loader/wine/avifmt.h" // for mmioFOURCC macro + +/* ------------------------------------------------------------------------ + * This function decodes a buffer containing a CYUV encoded frame. + * + * buf - the input buffer to be decoded + * size - the size of the input buffer + * frame - the output frame buffer (UYVY format) + * width - the width of the output frame + * height - the height of the output frame + * format - the requested output format + */ +void decode_cyuv(unsigned char *buf, int size, unsigned char *frame, int width, int height, int format) +{ +int i, xpos, ypos, cur_Y = 0, cur_U = 0, cur_V = 0; +char *delta_y_tbl, *delta_c_tbl, *ptr; + + delta_y_tbl = buf + 16; + delta_c_tbl = buf + 32; + ptr = buf + (16 * 3); + + for(ypos = 0; ypos < height; ypos++) + for(xpos = 0; xpos < width; xpos += 4) + { + if(xpos == 0) /* first pixels in scanline */ + { + cur_U = *(ptr++); + cur_Y = (cur_U & 0x0f) << 4; + cur_U = cur_U & 0xf0; + if (format == mmioFOURCC('Y','U','Y','2')) + { + *frame++ = cur_Y; + *frame++ = cur_U; + } + else + { + *frame++ = cur_U; + *frame++ = cur_Y; + } + + cur_V = *(ptr++); + cur_Y = (cur_Y + delta_y_tbl[cur_V & 0x0f]) & 0xff; + cur_V = cur_V & 0xf0; + if (format == mmioFOURCC('Y','U','Y','2')) + { + *frame++ = cur_Y; + *frame++ = cur_V; + } + else + { + *frame++ = cur_V; + *frame++ = cur_Y; + } + } + else /* subsequent pixels in scanline */ + { + i = *(ptr++); + cur_U = (cur_U + delta_c_tbl[i >> 4]) & 0xff; + cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff; + if (format == mmioFOURCC('Y','U','Y','2')) + { + *frame++ = cur_Y; + *frame++ = cur_U; + } + else + { + *frame++ = cur_U; + *frame++ = cur_Y; + } + + i = *(ptr++); + cur_V = (cur_V + delta_c_tbl[i >> 4]) & 0xff; + cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff; + if (format == mmioFOURCC('Y','U','Y','2')) + { + *frame++ = cur_Y; + *frame++ = cur_V; + } + else + { + *frame++ = cur_V; + *frame++ = cur_Y; + } + } + + i = *(ptr++); + cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff; + if (format == mmioFOURCC('Y','U','Y','2')) + { + *frame++ = cur_Y; + *frame++ = cur_U; + } + else + { + *frame++ = cur_U; + *frame++ = cur_Y; + } + + cur_Y = (cur_Y + delta_y_tbl[i >> 4]) & 0xff; + if (format == mmioFOURCC('Y','U','Y','2')) + { + *frame++ = cur_Y; + *frame++ = cur_V; + } + else + { + *frame++ = cur_V; + *frame++ = cur_Y; + } + } +} + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/fli.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/fli.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,379 @@ +/* + FLI Decoder for MPlayer + + (C) 2001 Mike Melanson + + 32bpp support (c) alex + + Additional code and bug fixes by Roberto Togni + + For information on the FLI format, as well as various traps to + avoid while programming one, visit: + http://www.pcisys.net/~melanson/codecs/ +*/ + +#include +#include +#include "config.h" +#include "bswap.h" +#include "mp_msg.h" + +#define LE_16(x) (le2me_16(*(unsigned short *)(x))) +#define LE_32(x) (le2me_32(*(unsigned int *)(x))) + +#define FLI_256_COLOR 4 +#define FLI_DELTA 7 +#define FLI_COLOR 11 +#define FLI_LC 12 +#define FLI_BLACK 13 +#define FLI_BRUN 15 +#define FLI_COPY 16 +#define FLI_MINI 18 + +// 256 RGB entries; 25% of these bytes will be unused, but it's faster +// to index 4-byte entries +#define PALETTE_SIZE 1024 +static unsigned char palette[PALETTE_SIZE]; + +void *init_fli_decoder(int width, int height) +{ + memset(palette, 0, PALETTE_SIZE); + + return malloc(width * height * sizeof (unsigned char)); +} + +void decode_fli_frame( + unsigned char *encoded, + int encoded_size, + unsigned char *decoded, + int width, + int height, + int bytes_per_pixel, + void *context) +{ + int stream_ptr = 0; + int stream_ptr_after_color_chunk; + int pixel_ptr; + int palette_ptr1; + int palette_ptr2; + unsigned char palette_idx1; + unsigned char palette_idx2; + + unsigned int frame_size; + int num_chunks; + + unsigned int chunk_size; + int chunk_type; + + int i, j; + + int color_packets; + int color_changes; + int color_scale; + + int lines; + int compressed_lines; + int starting_line; + signed short line_packets; + int y_ptr; + int line_inc = width * bytes_per_pixel; + signed char byte_run; + int pixel_skip; + int update_whole_frame = 0; // Palette change flag + unsigned char *fli_ghost_image = (unsigned char *)context; + int ghost_pixel_ptr; + int ghost_y_ptr; + + frame_size = LE_32(&encoded[stream_ptr]); + stream_ptr += 6; // skip the magic number + num_chunks = LE_16(&encoded[stream_ptr]); + stream_ptr += 10; // skip padding + + // iterate through the chunks + frame_size -= 16; + while ((frame_size > 0) && (num_chunks > 0)) + { + chunk_size = LE_32(&encoded[stream_ptr]); + stream_ptr += 4; + chunk_type = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + + switch (chunk_type) + { + case FLI_256_COLOR: + case FLI_COLOR: + stream_ptr_after_color_chunk = stream_ptr + chunk_size - 6; + if (chunk_type == FLI_COLOR) + color_scale = 4; + else + color_scale = 1; + // set up the palette + color_packets = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + palette_ptr1 = 0; + for (i = 0; i < color_packets; i++) + { + // first byte is how many colors to skip + palette_ptr1 += (encoded[stream_ptr++] * 4); + // wrap around, for good measure + if (palette_ptr1 >= PALETTE_SIZE) + palette_ptr1 = 0; + // next byte indicates how many entries to change + color_changes = encoded[stream_ptr++]; + // if there are 0 color changes, there are actually 256 + if (color_changes == 0) + color_changes = 256; + for (j = 0; j < color_changes; j++) + { + palette[palette_ptr1++] = encoded[stream_ptr + 2] * color_scale; + palette[palette_ptr1++] = encoded[stream_ptr + 1] * color_scale; + palette[palette_ptr1++] = encoded[stream_ptr + 0] * color_scale; + palette_ptr1++; + stream_ptr += 3; + } + } + + // color chunks sometimes have weird 16-bit alignment issues; + // therefore, take the hardline approach and set the stream_ptr + // to the value calculate w.r.t. the size specified by the color + // chunk header + stream_ptr = stream_ptr_after_color_chunk; + + /* Palette has changed, must update frame */ + update_whole_frame = 1; + break; + + case FLI_DELTA: + y_ptr = ghost_y_ptr = 0; + compressed_lines = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + while (compressed_lines > 0) + { + line_packets = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + if (line_packets < 0) + { + line_packets = -line_packets; + y_ptr += (line_packets * line_inc); + ghost_y_ptr += (line_packets * width); + } + else + { + pixel_ptr = y_ptr; + ghost_pixel_ptr = ghost_y_ptr; + for (i = 0; i < line_packets; i++) + { + // account for the skip bytes + pixel_skip = encoded[stream_ptr++]; + pixel_ptr += pixel_skip * bytes_per_pixel; + ghost_pixel_ptr += pixel_skip; + byte_run = encoded[stream_ptr++]; + if (byte_run < 0) + { + byte_run = -byte_run; + palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; + palette_ptr2 = (palette_idx2 = encoded[stream_ptr++]) * 4; + for (j = 0; j < byte_run; j++) + { + fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; + decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + + fli_ghost_image[ghost_pixel_ptr++] = palette_idx2; + decoded[pixel_ptr++] = palette[palette_ptr2 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr2 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr2 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + } + else + { + for (j = 0; j < byte_run * 2; j++) + { + palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; + fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; + decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + } + } + y_ptr += line_inc; + ghost_y_ptr += width; + compressed_lines--; + } + } + break; + + case FLI_LC: + // line compressed + starting_line = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + y_ptr = starting_line * line_inc; + ghost_y_ptr = starting_line * width; + + compressed_lines = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + while (compressed_lines > 0) + { + pixel_ptr = y_ptr; + ghost_pixel_ptr = ghost_y_ptr; + line_packets = encoded[stream_ptr++]; + if (line_packets > 0) + { + for (i = 0; i < line_packets; i++) + { + // account for the skip bytes + pixel_skip = encoded[stream_ptr++]; + pixel_ptr += pixel_skip * bytes_per_pixel; + ghost_pixel_ptr += pixel_skip; + byte_run = encoded[stream_ptr++]; + if (byte_run > 0) + { + for (j = 0; j < byte_run; j++) + { + palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; + fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; + decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + } + else + { + byte_run = -byte_run; + palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; + for (j = 0; j < byte_run; j++) + { + fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; + decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + } + } + } + + y_ptr += line_inc; + ghost_y_ptr += width; + compressed_lines--; + } + break; + + case FLI_BLACK: + // set the whole frame to color 0 (which is usually black) by + // clearing the ghost image and trigger a full frame update + memset(fli_ghost_image, 0, width * height * sizeof(unsigned char)); + update_whole_frame = 1; + break; + + case FLI_BRUN: + // byte run compression + y_ptr = 0; + ghost_y_ptr = 0; + for (lines = 0; lines < height; lines++) + { + pixel_ptr = y_ptr; + ghost_pixel_ptr = ghost_y_ptr; + line_packets = encoded[stream_ptr++]; + for (i = 0; i < line_packets; i++) + { + byte_run = encoded[stream_ptr++]; + if (byte_run > 0) + { + palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; + for (j = 0; j < byte_run; j++) + { + fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; + decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + } + else // copy bytes if byte_run < 0 + { + byte_run = -byte_run; + for (j = 0; j < byte_run; j++) + { + palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4; + fli_ghost_image[ghost_pixel_ptr++] = palette_idx1; + decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + } + } + + y_ptr += line_inc; + ghost_y_ptr += width; + } + break; + + case FLI_COPY: + // copy the chunk (uncompressed frame) to the ghost image and + // schedule the whole frame to be updated + if (chunk_size - 6 > width * height) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "FLI: in chunk FLI_COPY : source data (%d bytes) bigger than image," \ + " skipping chunk\n", + chunk_size - 6); + break; + } + else + memcpy(fli_ghost_image, &encoded[stream_ptr], chunk_size - 6); + stream_ptr += chunk_size - 6; + update_whole_frame = 1; + break; + + case FLI_MINI: + // sort of a thumbnail? disregard this chunk... + stream_ptr += chunk_size - 6; + break; + + default: + mp_msg (MSGT_DECVIDEO, MSGL_WARN, + "FLI: Unrecognized chunk type: %d\n", chunk_type); + break; + } + + frame_size -= chunk_size; + num_chunks--; + } + + if (update_whole_frame) + { + pixel_ptr = ghost_pixel_ptr = 0; + while (pixel_ptr < (width * height * bytes_per_pixel)) + { + palette_ptr1 = fli_ghost_image[ghost_pixel_ptr++] * 4; + decoded[pixel_ptr++] = palette[palette_ptr1 + 0]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 1]; + decoded[pixel_ptr++] = palette[palette_ptr1 + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + } + + // by the end of the chunk, the stream ptr should equal the frame + // size (minus 1, possibly); if it doesn't, issue a warning + if ((stream_ptr != encoded_size) && (stream_ptr != encoded_size - 1)) + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + " warning: processed FLI chunk where encoded size = %d\n" \ + " and final chunk ptr = %d\n", + encoded_size, stream_ptr); +} diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/lzoconf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/lzoconf.h Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,386 @@ +/* lzoconf.h -- configuration for the LZO real-time data compression library + + This file is part of the LZO real-time data compression library. + + Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html + */ + + +#ifndef __LZOCONF_H +#define __LZOCONF_H + +#define LZO_VERSION 0x1070 +#define LZO_VERSION_STRING "1.07" +#define LZO_VERSION_DATE "Oct 18 2000" + +/* internal Autoconf configuration file - only used when building LZO */ +#if defined(LZO_HAVE_CONFIG_H) +# include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/*********************************************************************** +// LZO requires a conforming +************************************************************************/ + +#if !defined(CHAR_BIT) || (CHAR_BIT != 8) +# error "invalid CHAR_BIT" +#endif +#if !defined(UCHAR_MAX) || !defined(UINT_MAX) || !defined(ULONG_MAX) +# error "check your compiler installation" +#endif +#if (USHRT_MAX < 1) || (UINT_MAX < 1) || (ULONG_MAX < 1) +# error "your limits.h macros are broken" +#endif + +/* workaround a cpp bug under hpux 10.20 */ +#define LZO_0xffffffffL 4294967295ul + + +/*********************************************************************** +// architecture defines +************************************************************************/ + +#if !defined(__LZO_WIN) && !defined(__LZO_DOS) && !defined(__LZO_OS2) +# if defined(__WINDOWS__) || defined(_WINDOWS) || defined(_Windows) +# define __LZO_WIN +# elif defined(__WIN32__) || defined(_WIN32) || defined(WIN32) +# define __LZO_WIN +# elif defined(__NT__) || defined(__NT_DLL__) || defined(__WINDOWS_386__) +# define __LZO_WIN +# elif defined(__DOS__) || defined(__MSDOS__) || defined(MSDOS) +# define __LZO_DOS +# elif defined(__OS2__) || defined(__OS2V2__) || defined(OS2) +# define __LZO_OS2 +# elif defined(__palmos__) +# define __LZO_PALMOS +# elif defined(__TOS__) || defined(__atarist__) +# define __LZO_TOS +# endif +#endif + +#if (UINT_MAX < LZO_0xffffffffL) +# if defined(__LZO_WIN) +# define __LZO_WIN16 +# elif defined(__LZO_DOS) +# define __LZO_DOS16 +# elif defined(__LZO_PALMOS) +# define __LZO_PALMOS16 +# elif defined(__LZO_TOS) +# define __LZO_TOS16 +# elif defined(__C166__) +# else +# error "16-bit target not supported - contact me for porting hints" +# endif +#endif + +#if !defined(__LZO_i386) +# if defined(__LZO_DOS) || defined(__LZO_WIN16) +# define __LZO_i386 +# elif defined(__i386__) || defined(__386__) || defined(_M_IX86) +# define __LZO_i386 +# endif +#endif + +#if defined(__LZO_STRICT_16BIT) +# if (UINT_MAX < LZO_0xffffffffL) +# include +# endif +#endif + +/* memory checkers */ +#if !defined(__LZO_CHECKER) +# if defined(__BOUNDS_CHECKING_ON) +# define __LZO_CHECKER +# elif defined(__CHECKER__) +# define __LZO_CHECKER +# elif defined(__INSURE__) +# define __LZO_CHECKER +# elif defined(__PURIFY__) +# define __LZO_CHECKER +# endif +#endif + + +/*********************************************************************** +// integral and pointer types +************************************************************************/ + +/* Integral types with 32 bits or more */ +#if !defined(LZO_UINT32_MAX) +# if (UINT_MAX >= LZO_0xffffffffL) + typedef unsigned int lzo_uint32; + typedef int lzo_int32; +# define LZO_UINT32_MAX UINT_MAX +# define LZO_INT32_MAX INT_MAX +# define LZO_INT32_MIN INT_MIN +# elif (ULONG_MAX >= LZO_0xffffffffL) + typedef unsigned long lzo_uint32; + typedef long lzo_int32; +# define LZO_UINT32_MAX ULONG_MAX +# define LZO_INT32_MAX LONG_MAX +# define LZO_INT32_MIN LONG_MIN +# else +# error "lzo_uint32" +# endif +#endif + +/* lzo_uint is used like size_t */ +#if !defined(LZO_UINT_MAX) +# if (UINT_MAX >= LZO_0xffffffffL) + typedef unsigned int lzo_uint; + typedef int lzo_int; +# define LZO_UINT_MAX UINT_MAX +# define LZO_INT_MAX INT_MAX +# define LZO_INT_MIN INT_MIN +# elif (ULONG_MAX >= LZO_0xffffffffL) + typedef unsigned long lzo_uint; + typedef long lzo_int; +# define LZO_UINT_MAX ULONG_MAX +# define LZO_INT_MAX LONG_MAX +# define LZO_INT_MIN LONG_MIN +# else +# error "lzo_uint" +# endif +#endif + + +/* Memory model that allows to access memory at offsets of lzo_uint. */ +#if !defined(__LZO_MMODEL) +# if (LZO_UINT_MAX <= UINT_MAX) +# define __LZO_MMODEL +# elif defined(__LZO_DOS16) || defined(__LZO_WIN16) +# define __LZO_MMODEL __huge +# define LZO_999_UNSUPPORTED +# elif defined(__LZO_PALMOS16) || defined(__LZO_TOS16) +# define __LZO_MMODEL +# else +# error "__LZO_MMODEL" +# endif +#endif + +/* no typedef here because of const-pointer issues */ +#define lzo_byte unsigned char __LZO_MMODEL +#define lzo_bytep unsigned char __LZO_MMODEL * +#define lzo_charp char __LZO_MMODEL * +#define lzo_voidp void __LZO_MMODEL * +#define lzo_shortp short __LZO_MMODEL * +#define lzo_ushortp unsigned short __LZO_MMODEL * +#define lzo_uint32p lzo_uint32 __LZO_MMODEL * +#define lzo_int32p lzo_int32 __LZO_MMODEL * +#define lzo_uintp lzo_uint __LZO_MMODEL * +#define lzo_intp lzo_int __LZO_MMODEL * +#define lzo_voidpp lzo_voidp __LZO_MMODEL * +#define lzo_bytepp lzo_bytep __LZO_MMODEL * + +typedef int lzo_bool; + +#ifndef lzo_sizeof_dict_t +# define lzo_sizeof_dict_t sizeof(lzo_bytep) +#endif + + +/*********************************************************************** +// function types +************************************************************************/ + +/* linkage */ +#if !defined(__LZO_EXTERN_C) +# ifdef __cplusplus +# define __LZO_EXTERN_C extern "C" +# else +# define __LZO_EXTERN_C extern +# endif +#endif + +/* calling conventions */ +#if !defined(__LZO_CDECL) +# if defined(__LZO_DOS16) || defined(__LZO_WIN16) +# define __LZO_CDECL __far __cdecl +# elif defined(__LZO_i386) && defined(_MSC_VER) +# define __LZO_CDECL __cdecl +# elif defined(__LZO_i386) && defined(__WATCOMC__) +# define __LZO_CDECL __near __cdecl +# else +# define __LZO_CDECL +# endif +#endif +#if !defined(__LZO_ENTRY) +# define __LZO_ENTRY __LZO_CDECL +#endif + +/* DLL export information */ +#if !defined(__LZO_EXPORT1) +# define __LZO_EXPORT1 +#endif +#if !defined(__LZO_EXPORT2) +# define __LZO_EXPORT2 +#endif + +/* calling convention for C functions */ +#if !defined(LZO_PUBLIC) +# define LZO_PUBLIC(_rettype) __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_ENTRY +#endif +#if !defined(LZO_EXTERN) +# define LZO_EXTERN(_rettype) __LZO_EXTERN_C LZO_PUBLIC(_rettype) +#endif +#if !defined(LZO_PRIVATE) +# define LZO_PRIVATE(_rettype) static _rettype __LZO_ENTRY +#endif + +/* cdecl calling convention for assembler functions */ +#if !defined(LZO_PUBLIC_CDECL) +# define LZO_PUBLIC_CDECL(_rettype) \ + __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_CDECL +#endif +#if !defined(LZO_EXTERN_CDECL) +# define LZO_EXTERN_CDECL(_rettype) __LZO_EXTERN_C LZO_PUBLIC_CDECL(_rettype) +#endif + + +typedef int +(__LZO_ENTRY *lzo_compress_t) ( const lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem ); + +typedef int +(__LZO_ENTRY *lzo_decompress_t) ( const lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem ); + +typedef int +(__LZO_ENTRY *lzo_optimize_t) ( lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem ); + +typedef int +(__LZO_ENTRY *lzo_compress_dict_t)(const lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem, + const lzo_byte *dict, lzo_uint dict_len ); + +typedef int +(__LZO_ENTRY *lzo_decompress_dict_t)(const lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem, + const lzo_byte *dict, lzo_uint dict_len ); + + +/* a progress indicator callback function */ +typedef void (__LZO_ENTRY *lzo_progress_callback_t) (lzo_uint, lzo_uint); + + +/*********************************************************************** +// error codes and prototypes +************************************************************************/ + +/* Error codes for the compression/decompression functions. Negative + * values are errors, positive values will be used for special but + * normal events. + */ +#define LZO_E_OK 0 +#define LZO_E_ERROR (-1) +#define LZO_E_OUT_OF_MEMORY (-2) /* not used right now */ +#define LZO_E_NOT_COMPRESSIBLE (-3) /* not used right now */ +#define LZO_E_INPUT_OVERRUN (-4) +#define LZO_E_OUTPUT_OVERRUN (-5) +#define LZO_E_LOOKBEHIND_OVERRUN (-6) +#define LZO_E_EOF_NOT_FOUND (-7) +#define LZO_E_INPUT_NOT_CONSUMED (-8) + + +/* lzo_init() should be the first function you call. + * Check the return code ! + * + * lzo_init() is a macro to allow checking that the library and the + * compiler's view of various types are consistent. + */ +#define lzo_init() __lzo_init2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\ + (int)sizeof(long),(int)sizeof(lzo_uint32),(int)sizeof(lzo_uint),\ + (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\ + (int)sizeof(lzo_compress_t)) +LZO_EXTERN(int) __lzo_init2(unsigned,int,int,int,int,int,int,int,int,int); + +/* version functions (useful for shared libraries) */ +LZO_EXTERN(unsigned) lzo_version(void); +LZO_EXTERN(const char *) lzo_version_string(void); +LZO_EXTERN(const char *) lzo_version_date(void); +LZO_EXTERN(const lzo_charp) _lzo_version_string(void); +LZO_EXTERN(const lzo_charp) _lzo_version_date(void); + +/* string functions */ +LZO_EXTERN(int) +lzo_memcmp(const lzo_voidp _s1, const lzo_voidp _s2, lzo_uint _len); +LZO_EXTERN(lzo_voidp) +lzo_memcpy(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len); +LZO_EXTERN(lzo_voidp) +lzo_memmove(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len); +LZO_EXTERN(lzo_voidp) +lzo_memset(lzo_voidp _s, int _c, lzo_uint _len); + +/* checksum functions */ +LZO_EXTERN(lzo_uint32) +lzo_adler32(lzo_uint32 _adler, const lzo_byte *_buf, lzo_uint _len); +LZO_EXTERN(lzo_uint32) +lzo_crc32(lzo_uint32 _c, const lzo_byte *_buf, lzo_uint _len); + +/* memory allocation functions */ +LZO_EXTERN(lzo_bytep) lzo_alloc(lzo_uint _nelems, lzo_uint _size); +LZO_EXTERN(lzo_bytep) lzo_malloc(lzo_uint _size); +LZO_EXTERN(void) lzo_free(lzo_voidp _ptr); + +typedef lzo_bytep (__LZO_ENTRY *lzo_alloc_hook_t) (lzo_uint, lzo_uint); +typedef void (__LZO_ENTRY *lzo_free_hook_t) (lzo_voidp); + +extern lzo_alloc_hook_t lzo_alloc_hook; +extern lzo_free_hook_t lzo_free_hook; + +/* misc. */ +LZO_EXTERN(lzo_bool) lzo_assert(int _expr); +LZO_EXTERN(int) _lzo_config_check(void); +typedef union { lzo_bytep p; lzo_uint u; } __lzo_pu_u; +typedef union { lzo_bytep p; lzo_uint32 u32; } __lzo_pu32_u; + +/* align a char pointer on a boundary that is a multiple of `size' */ +LZO_EXTERN(unsigned) __lzo_align_gap(const lzo_voidp _ptr, lzo_uint _size); +#define LZO_PTR_ALIGN_UP(_ptr,_size) \ + ((_ptr) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(_ptr),(lzo_uint)(_size))) + +/* deprecated - only for backward compatibility */ +#define LZO_ALIGN(_ptr,_size) LZO_PTR_ALIGN_UP(_ptr,_size) + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/minilzo.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/minilzo.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,2848 @@ +/* minilzo.c -- mini subset of the LZO real-time data compression library + + This file is part of the LZO real-time data compression library. + + Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html + */ + +/* + * NOTE: + * the full LZO package can be found at + * http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html + */ + +#define __LZO_IN_MINILZO + +#ifdef MINILZO_HAVE_CONFIG_H +# include +#endif + +#undef LZO_HAVE_CONFIG_H +#include "minilzo.h" + +#if !defined(MINILZO_VERSION) || (MINILZO_VERSION != 0x1070) +# error "version mismatch in miniLZO source files" +#endif + +#ifdef MINILZO_HAVE_CONFIG_H +# define LZO_HAVE_CONFIG_H +#endif + +#if !defined(LZO_NO_SYS_TYPES_H) +# include +#endif +#include + +#ifndef __LZO_CONF_H +#define __LZO_CONF_H + +#if !defined(__LZO_IN_MINILZO) +# ifndef __LZOCONF_H +# include +# endif +#endif + +#if defined(__BOUNDS_CHECKING_ON) +# include +#else +# define BOUNDS_CHECKING_OFF_DURING(stmt) stmt +# define BOUNDS_CHECKING_OFF_IN_EXPR(expr) (expr) +#endif + +#if !defined(LZO_HAVE_CONFIG_H) +# include +# include +# if !defined(NO_STDLIB_H) +# include +# endif +# define HAVE_MEMCMP +# define HAVE_MEMCPY +# define HAVE_MEMMOVE +# define HAVE_MEMSET +#else +# include +# if defined(STDC_HEADERS) +# include +# include +# endif +# if defined(HAVE_STDDEF_H) +# include +# endif +# if defined(HAVE_MEMORY_H) +# include +# endif +#endif + +#if defined(__LZO_DOS16) || defined(__LZO_WIN16) +# define HAVE_MALLOC_H +# define HAVE_HALLOC +#endif + +#undef NDEBUG +#if !defined(LZO_DEBUG) +# define NDEBUG +#endif +#if defined(LZO_DEBUG) || !defined(NDEBUG) +# if !defined(NO_STDIO_H) +# include +# endif +#endif +#include + +#if !defined(LZO_UNUSED) +# define LZO_UNUSED(parm) (parm = parm) +#endif + +#if !defined(__inline__) && !defined(__GNUC__) +# if defined(__cplusplus) +# define __inline__ inline +# else +# define __inline__ +# endif +#endif + +#if defined(NO_MEMCMP) +# undef HAVE_MEMCMP +#endif + +#if !defined(HAVE_MEMCMP) +# undef memcmp +# define memcmp lzo_memcmp +#endif +#if !defined(HAVE_MEMCPY) +# undef memcpy +# define memcpy lzo_memcpy +#endif +#if !defined(HAVE_MEMMOVE) +# undef memmove +# define memmove lzo_memmove +#endif +#if !defined(HAVE_MEMSET) +# undef memset +# define memset lzo_memset +#endif + +#if 1 +# define LZO_BYTE(x) ((unsigned char) (x)) +#else +# define LZO_BYTE(x) ((unsigned char) ((x) & 0xff)) +#endif +#if 0 +# define LZO_USHORT(x) ((unsigned short) (x)) +#else +# define LZO_USHORT(x) ((unsigned short) ((x) & 0xffff)) +#endif + +#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b)) +#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b)) +#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c)) +#define LZO_MIN3(a,b,c) ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c)) + +#define lzo_sizeof(type) ((lzo_uint) (sizeof(type))) + +#define LZO_HIGH(array) ((lzo_uint) (sizeof(array)/sizeof(*(array)))) + +#define LZO_SIZE(bits) (1u << (bits)) +#define LZO_MASK(bits) (LZO_SIZE(bits) - 1) + +#define LZO_LSIZE(bits) (1ul << (bits)) +#define LZO_LMASK(bits) (LZO_LSIZE(bits) - 1) + +#define LZO_USIZE(bits) ((lzo_uint) 1 << (bits)) +#define LZO_UMASK(bits) (LZO_USIZE(bits) - 1) + +#define LZO_STYPE_MAX(b) (((1l << (8*(b)-2)) - 1l) + (1l << (8*(b)-2))) +#define LZO_UTYPE_MAX(b) (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1))) + +#if !defined(SIZEOF_UNSIGNED) +# if (UINT_MAX == 0xffff) +# define SIZEOF_UNSIGNED 2 +# elif (UINT_MAX == LZO_0xffffffffL) +# define SIZEOF_UNSIGNED 4 +# elif (UINT_MAX >= LZO_0xffffffffL) +# define SIZEOF_UNSIGNED 8 +# else +# error SIZEOF_UNSIGNED +# endif +#endif + +#if !defined(SIZEOF_UNSIGNED_LONG) +# if (ULONG_MAX == LZO_0xffffffffL) +# define SIZEOF_UNSIGNED_LONG 4 +# elif (ULONG_MAX >= LZO_0xffffffffL) +# define SIZEOF_UNSIGNED_LONG 8 +# else +# error SIZEOF_UNSIGNED_LONG +# endif +#endif + +#if !defined(SIZEOF_SIZE_T) +# define SIZEOF_SIZE_T SIZEOF_UNSIGNED +#endif +#if !defined(SIZE_T_MAX) +# define SIZE_T_MAX LZO_UTYPE_MAX(SIZEOF_SIZE_T) +#endif + +#if 1 && defined(__LZO_i386) && (UINT_MAX == LZO_0xffffffffL) +# if !defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX == 0xffff) +# define LZO_UNALIGNED_OK_2 +# endif +# if !defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX == LZO_0xffffffffL) +# define LZO_UNALIGNED_OK_4 +# endif +#endif + +#if defined(LZO_UNALIGNED_OK_2) || defined(LZO_UNALIGNED_OK_4) +# if !defined(LZO_UNALIGNED_OK) +# define LZO_UNALIGNED_OK +# endif +#endif + +#if defined(__LZO_NO_UNALIGNED) +# undef LZO_UNALIGNED_OK +# undef LZO_UNALIGNED_OK_2 +# undef LZO_UNALIGNED_OK_4 +#endif + +#if defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX != 0xffff) +# error "LZO_UNALIGNED_OK_2 must not be defined on this system" +#endif +#if defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL) +# error "LZO_UNALIGNED_OK_4 must not be defined on this system" +#endif + +#if defined(__LZO_NO_ALIGNED) +# undef LZO_ALIGNED_OK_4 +#endif + +#if defined(LZO_ALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL) +# error "LZO_ALIGNED_OK_4 must not be defined on this system" +#endif + +#define LZO_LITTLE_ENDIAN 1234 +#define LZO_BIG_ENDIAN 4321 +#define LZO_PDP_ENDIAN 3412 + +#if !defined(LZO_BYTE_ORDER) +# if defined(MFX_BYTE_ORDER) +# define LZO_BYTE_ORDER MFX_BYTE_ORDER +# elif defined(__LZO_i386) +# define LZO_BYTE_ORDER LZO_LITTLE_ENDIAN +# elif defined(BYTE_ORDER) +# define LZO_BYTE_ORDER BYTE_ORDER +# elif defined(__BYTE_ORDER) +# define LZO_BYTE_ORDER __BYTE_ORDER +# endif +#endif + +#if defined(LZO_BYTE_ORDER) +# if (LZO_BYTE_ORDER != LZO_LITTLE_ENDIAN) && \ + (LZO_BYTE_ORDER != LZO_BIG_ENDIAN) +# error "invalid LZO_BYTE_ORDER" +# endif +#endif + +#if defined(LZO_UNALIGNED_OK) && !defined(LZO_BYTE_ORDER) +# error "LZO_BYTE_ORDER is not defined" +#endif + +#define LZO_OPTIMIZE_GNUC_i386_IS_BUGGY + +#if defined(NDEBUG) && !defined(LZO_DEBUG) && !defined(__LZO_CHECKER) +# if defined(__GNUC__) && defined(__i386__) +# if !defined(LZO_OPTIMIZE_GNUC_i386_IS_BUGGY) +# define LZO_OPTIMIZE_GNUC_i386 +# endif +# endif +#endif + +__LZO_EXTERN_C int __lzo_init_done; +__LZO_EXTERN_C const lzo_byte __lzo_copyright[]; +LZO_EXTERN(const lzo_byte *) lzo_copyright(void); +__LZO_EXTERN_C const lzo_uint32 _lzo_crc32_table[256]; + +#define _LZO_STRINGIZE(x) #x +#define _LZO_MEXPAND(x) _LZO_STRINGIZE(x) + +#define _LZO_CONCAT2(a,b) a ## b +#define _LZO_CONCAT3(a,b,c) a ## b ## c +#define _LZO_CONCAT4(a,b,c,d) a ## b ## c ## d +#define _LZO_CONCAT5(a,b,c,d,e) a ## b ## c ## d ## e + +#define _LZO_ECONCAT2(a,b) _LZO_CONCAT2(a,b) +#define _LZO_ECONCAT3(a,b,c) _LZO_CONCAT3(a,b,c) +#define _LZO_ECONCAT4(a,b,c,d) _LZO_CONCAT4(a,b,c,d) +#define _LZO_ECONCAT5(a,b,c,d,e) _LZO_CONCAT5(a,b,c,d,e) + +#if 0 + +#define __LZO_IS_COMPRESS_QUERY(i,il,o,ol,w) ((lzo_voidp)(o) == (w)) +#define __LZO_QUERY_COMPRESS(i,il,o,ol,w,n,s) \ + (*ol = (n)*(s), LZO_E_OK) + +#define __LZO_IS_DECOMPRESS_QUERY(i,il,o,ol,w) ((lzo_voidp)(o) == (w)) +#define __LZO_QUERY_DECOMPRESS(i,il,o,ol,w,n,s) \ + (*ol = (n)*(s), LZO_E_OK) + +#define __LZO_IS_OPTIMIZE_QUERY(i,il,o,ol,w) ((lzo_voidp)(o) == (w)) +#define __LZO_QUERY_OPTIMIZE(i,il,o,ol,w,n,s) \ + (*ol = (n)*(s), LZO_E_OK) + +#endif + +#ifndef __LZO_PTR_H +#define __LZO_PTR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__LZO_DOS16) || defined(__LZO_WIN16) +# include +# if 1 && defined(__WATCOMC__) +# include + __LZO_EXTERN_C unsigned char _HShift; +# define __LZO_HShift _HShift +# elif 1 && defined(_MSC_VER) + __LZO_EXTERN_C unsigned short __near _AHSHIFT; +# define __LZO_HShift ((unsigned) &_AHSHIFT) +# elif defined(__LZO_WIN16) +# define __LZO_HShift 3 +# else +# define __LZO_HShift 12 +# endif +# if !defined(_FP_SEG) && defined(FP_SEG) +# define _FP_SEG FP_SEG +# endif +# if !defined(_FP_OFF) && defined(FP_OFF) +# define _FP_OFF FP_OFF +# endif +#endif + +#if (UINT_MAX >= LZO_0xffffffffL) + typedef ptrdiff_t lzo_ptrdiff_t; +#else + typedef long lzo_ptrdiff_t; +#endif + +#if !defined(__LZO_HAVE_PTR_T) +# if defined(lzo_ptr_t) +# define __LZO_HAVE_PTR_T +# endif +#endif +#if !defined(__LZO_HAVE_PTR_T) +# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_LONG) +# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_LONG) + typedef unsigned long lzo_ptr_t; + typedef long lzo_sptr_t; +# define __LZO_HAVE_PTR_T +# endif +# endif +#endif +#if !defined(__LZO_HAVE_PTR_T) +# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED) +# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED) + typedef unsigned int lzo_ptr_t; + typedef int lzo_sptr_t; +# define __LZO_HAVE_PTR_T +# endif +# endif +#endif +#if !defined(__LZO_HAVE_PTR_T) +# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_SHORT) +# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_SHORT) + typedef unsigned short lzo_ptr_t; + typedef short lzo_sptr_t; +# define __LZO_HAVE_PTR_T +# endif +# endif +#endif +#if !defined(__LZO_HAVE_PTR_T) +# if defined(LZO_HAVE_CONFIG_H) || defined(SIZEOF_CHAR_P) +# error "no suitable type for lzo_ptr_t" +# else + typedef unsigned long lzo_ptr_t; + typedef long lzo_sptr_t; +# define __LZO_HAVE_PTR_T +# endif +#endif + +#if defined(__LZO_DOS16) || defined(__LZO_WIN16) +#define PTR(a) ((lzo_bytep) (a)) +#define PTR_ALIGNED_4(a) ((_FP_OFF(a) & 3) == 0) +#define PTR_ALIGNED2_4(a,b) (((_FP_OFF(a) | _FP_OFF(b)) & 3) == 0) +#else +#define PTR(a) ((lzo_ptr_t) (a)) +#define PTR_LINEAR(a) PTR(a) +#define PTR_ALIGNED_4(a) ((PTR_LINEAR(a) & 3) == 0) +#define PTR_ALIGNED_8(a) ((PTR_LINEAR(a) & 7) == 0) +#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0) +#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0) +#endif + +#define PTR_LT(a,b) (PTR(a) < PTR(b)) +#define PTR_GE(a,b) (PTR(a) >= PTR(b)) +#define PTR_DIFF(a,b) ((lzo_ptrdiff_t) (PTR(a) - PTR(b))) + +LZO_EXTERN(lzo_ptr_t) +__lzo_ptr_linear(const lzo_voidp ptr); + +typedef union +{ + char a_char; + unsigned char a_uchar; + short a_short; + unsigned short a_ushort; + int a_int; + unsigned int a_uint; + long a_long; + unsigned long a_ulong; + lzo_int a_lzo_int; + lzo_uint a_lzo_uint; + lzo_int32 a_lzo_int32; + lzo_uint32 a_lzo_uint32; + ptrdiff_t a_ptrdiff_t; + lzo_ptrdiff_t a_lzo_ptrdiff_t; + lzo_ptr_t a_lzo_ptr_t; + char * a_charp; + lzo_bytep a_lzo_bytep; + lzo_bytepp a_lzo_bytepp; +} +lzo_align_t; + +#ifdef __cplusplus +} +#endif + +#endif + +#define LZO_DETERMINISTIC + +#define LZO_DICT_USE_PTR +#if defined(__LZO_DOS16) || defined(__LZO_WIN16) || defined(__LZO_STRICT_16BIT) +# undef LZO_DICT_USE_PTR +#endif + +#if defined(LZO_DICT_USE_PTR) +# define lzo_dict_t const lzo_bytep +# define lzo_dict_p lzo_dict_t __LZO_MMODEL * +#else +# define lzo_dict_t lzo_uint +# define lzo_dict_p lzo_dict_t __LZO_MMODEL * +#endif + +#if !defined(lzo_moff_t) +#define lzo_moff_t lzo_uint +#endif + +#endif + +LZO_PUBLIC(lzo_ptr_t) +__lzo_ptr_linear(const lzo_voidp ptr) +{ + lzo_ptr_t p; + +#if defined(__LZO_DOS16) || defined(__LZO_WIN16) + p = (((lzo_ptr_t)(_FP_SEG(ptr))) << (16 - __LZO_HShift)) + (_FP_OFF(ptr)); +#else + p = PTR_LINEAR(ptr); +#endif + + return p; +} + +LZO_PUBLIC(unsigned) +__lzo_align_gap(const lzo_voidp ptr, lzo_uint size) +{ + lzo_ptr_t p, s, n; + + assert(size > 0); + + p = __lzo_ptr_linear(ptr); + s = (lzo_ptr_t) (size - 1); +#if 0 + assert((size & (size - 1)) == 0); + n = ((p + s) & ~s) - p; +#else + n = (((p + s) / size) * size) - p; +#endif + + assert((long)n >= 0); + assert(n <= s); + + return (unsigned)n; +} + +#ifndef __LZO_UTIL_H +#define __LZO_UTIL_H + +#ifndef __LZO_CONF_H +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if 1 && defined(HAVE_MEMCPY) +#if !defined(__LZO_DOS16) && !defined(__LZO_WIN16) + +#define MEMCPY8_DS(dest,src,len) \ + memcpy(dest,src,len); \ + dest += len; \ + src += len + +#endif +#endif + +#if 0 && !defined(MEMCPY8_DS) + +#define MEMCPY8_DS(dest,src,len) \ + { do { \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + len -= 8; \ + } while (len > 0); } + +#endif + +#if !defined(MEMCPY8_DS) + +#define MEMCPY8_DS(dest,src,len) \ + { register lzo_uint __l = (len) / 8; \ + do { \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + *dest++ = *src++; \ + } while (--__l > 0); } + +#endif + +#define MEMCPY_DS(dest,src,len) \ + do *dest++ = *src++; \ + while (--len > 0) + +#define MEMMOVE_DS(dest,src,len) \ + do *dest++ = *src++; \ + while (--len > 0) + +#if 0 && defined(LZO_OPTIMIZE_GNUC_i386) + +#define BZERO8_PTR(s,l,n) \ +__asm__ __volatile__( \ + "movl %0,%%eax \n" \ + "movl %1,%%edi \n" \ + "movl %2,%%ecx \n" \ + "cld \n" \ + "rep \n" \ + "stosl %%eax,(%%edi) \n" \ + : \ + :"g" (0),"g" (s),"g" (n) \ + :"eax","edi","ecx", "memory", "cc" \ +) + +#elif (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET) + +#if 1 +#define BZERO8_PTR(s,l,n) memset((s),0,(lzo_uint)(l)*(n)) +#else +#define BZERO8_PTR(s,l,n) memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n)) +#endif + +#else + +#define BZERO8_PTR(s,l,n) \ + lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n)) + +#endif + +#if 0 +#if defined(__GNUC__) && defined(__i386__) + +unsigned char lzo_rotr8(unsigned char value, int shift); +extern __inline__ unsigned char lzo_rotr8(unsigned char value, int shift) +{ + unsigned char result; + + __asm__ __volatile__ ("movb %b1, %b0; rorb %b2, %b0" + : "=a"(result) : "g"(value), "c"(shift)); + return result; +} + +unsigned short lzo_rotr16(unsigned short value, int shift); +extern __inline__ unsigned short lzo_rotr16(unsigned short value, int shift) +{ + unsigned short result; + + __asm__ __volatile__ ("movw %b1, %b0; rorw %b2, %b0" + : "=a"(result) : "g"(value), "c"(shift)); + return result; +} + +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +LZO_PUBLIC(lzo_bool) +lzo_assert(int expr) +{ + return (expr) ? 1 : 0; +} + +/* If you use the LZO library in a product, you *must* keep this + * copyright string in the executable of your product. + */ + +const lzo_byte __lzo_copyright[] = +#if !defined(__LZO_IN_MINLZO) + LZO_VERSION_STRING; +#else + "\n\n\n" + "LZO real-time data compression library.\n" + "Copyright (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer\n" + "\n" + "http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html\n" + "\n" + "LZO version: v" LZO_VERSION_STRING ", " LZO_VERSION_DATE "\n" + "LZO build date: " __DATE__ " " __TIME__ "\n\n" + "LZO special compilation options:\n" +#ifdef __cplusplus + " __cplusplus\n" +#endif +#if defined(__PIC__) + " __PIC__\n" +#elif defined(__pic__) + " __pic__\n" +#endif +#if (UINT_MAX < LZO_0xffffffffL) + " 16BIT\n" +#endif +#if defined(__LZO_STRICT_16BIT) + " __LZO_STRICT_16BIT\n" +#endif +#if (UINT_MAX > LZO_0xffffffffL) + " UINT_MAX=" _LZO_MEXPAND(UINT_MAX) "\n" +#endif +#if (ULONG_MAX > LZO_0xffffffffL) + " ULONG_MAX=" _LZO_MEXPAND(ULONG_MAX) "\n" +#endif +#if defined(LZO_BYTE_ORDER) + " LZO_BYTE_ORDER=" _LZO_MEXPAND(LZO_BYTE_ORDER) "\n" +#endif +#if defined(LZO_UNALIGNED_OK_2) + " LZO_UNALIGNED_OK_2\n" +#endif +#if defined(LZO_UNALIGNED_OK_4) + " LZO_UNALIGNED_OK_4\n" +#endif +#if defined(LZO_ALIGNED_OK_4) + " LZO_ALIGNED_OK_4\n" +#endif +#if defined(LZO_DICT_USE_PTR) + " LZO_DICT_USE_PTR\n" +#endif +#if defined(__LZO_QUERY_COMPRESS) + " __LZO_QUERY_COMPRESS\n" +#endif +#if defined(__LZO_QUERY_DECOMPRESS) + " __LZO_QUERY_DECOMPRESS\n" +#endif +#if defined(__LZO_IN_MINILZO) + " __LZO_IN_MINILZO\n" +#endif + "\n\n" + "$Id: LZO " LZO_VERSION_STRING " built " __DATE__ " " __TIME__ +#if defined(__GNUC__) && defined(__VERSION__) + " by gcc " __VERSION__ +#elif defined(__BORLANDC__) + " by Borland C " _LZO_MEXPAND(__BORLANDC__) +#elif defined(_MSC_VER) + " by Microsoft C " _LZO_MEXPAND(_MSC_VER) +#elif defined(__PUREC__) + " by Pure C " _LZO_MEXPAND(__PUREC__) +#elif defined(__SC__) + " by Symantec C " _LZO_MEXPAND(__SC__) +#elif defined(__TURBOC__) + " by Turbo C " _LZO_MEXPAND(__TURBOC__) +#elif defined(__WATCOMC__) + " by Watcom C " _LZO_MEXPAND(__WATCOMC__) +#endif + " $\n" + "$Copyright: LZO (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer $\n"; +#endif + +LZO_PUBLIC(const lzo_byte *) +lzo_copyright(void) +{ + return __lzo_copyright; +} + +LZO_PUBLIC(unsigned) +lzo_version(void) +{ + return LZO_VERSION; +} + +LZO_PUBLIC(const char *) +lzo_version_string(void) +{ + return LZO_VERSION_STRING; +} + +LZO_PUBLIC(const char *) +lzo_version_date(void) +{ + return LZO_VERSION_DATE; +} + +LZO_PUBLIC(const lzo_charp) +_lzo_version_string(void) +{ + return LZO_VERSION_STRING; +} + +LZO_PUBLIC(const lzo_charp) +_lzo_version_date(void) +{ + return LZO_VERSION_DATE; +} + +#define LZO_BASE 65521u +#define LZO_NMAX 5552 + +#define LZO_DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define LZO_DO2(buf,i) LZO_DO1(buf,i); LZO_DO1(buf,i+1); +#define LZO_DO4(buf,i) LZO_DO2(buf,i); LZO_DO2(buf,i+2); +#define LZO_DO8(buf,i) LZO_DO4(buf,i); LZO_DO4(buf,i+4); +#define LZO_DO16(buf,i) LZO_DO8(buf,i); LZO_DO8(buf,i+8); + +LZO_PUBLIC(lzo_uint32) +lzo_adler32(lzo_uint32 adler, const lzo_byte *buf, lzo_uint len) +{ + lzo_uint32 s1 = adler & 0xffff; + lzo_uint32 s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == NULL) + return 1; + + while (len > 0) + { + k = len < LZO_NMAX ? (int) len : LZO_NMAX; + len -= k; + if (k >= 16) do + { + LZO_DO16(buf,0); + buf += 16; + k -= 16; + } while (k >= 16); + if (k != 0) do + { + s1 += *buf++; + s2 += s1; + } while (--k > 0); + s1 %= LZO_BASE; + s2 %= LZO_BASE; + } + return (s2 << 16) | s1; +} + +LZO_PUBLIC(int) +lzo_memcmp(const lzo_voidp s1, const lzo_voidp s2, lzo_uint len) +{ +#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCMP) + return memcmp(s1,s2,len); +#else + const lzo_byte *p1 = (const lzo_byte *) s1; + const lzo_byte *p2 = (const lzo_byte *) s2; + int d; + + if (len > 0) do + { + d = *p1 - *p2; + if (d != 0) + return d; + p1++; + p2++; + } + while (--len > 0); + return 0; +#endif +} + +LZO_PUBLIC(lzo_voidp) +lzo_memcpy(lzo_voidp dest, const lzo_voidp src, lzo_uint len) +{ +#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCPY) + return memcpy(dest,src,len); +#else + lzo_byte *p1 = (lzo_byte *) dest; + const lzo_byte *p2 = (const lzo_byte *) src; + + if (len <= 0 || p1 == p2) + return dest; + do + *p1++ = *p2++; + while (--len > 0); + return dest; +#endif +} + +LZO_PUBLIC(lzo_voidp) +lzo_memmove(lzo_voidp dest, const lzo_voidp src, lzo_uint len) +{ +#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMMOVE) + return memmove(dest,src,len); +#else + lzo_byte *p1 = (lzo_byte *) dest; + const lzo_byte *p2 = (const lzo_byte *) src; + + if (len <= 0 || p1 == p2) + return dest; + + if (p1 < p2) + { + do + *p1++ = *p2++; + while (--len > 0); + } + else + { + p1 += len; + p2 += len; + do + *--p1 = *--p2; + while (--len > 0); + } + return dest; +#endif +} + +LZO_PUBLIC(lzo_voidp) +lzo_memset(lzo_voidp s, int c, lzo_uint len) +{ +#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET) + return memset(s,c,len); +#else + lzo_byte *p = (lzo_byte *) s; + + if (len > 0) do + *p++ = LZO_BYTE(c); + while (--len > 0); + return s; +#endif +} + +#include + +#if 0 +# define IS_SIGNED(type) (((type) (1ul << (8 * sizeof(type) - 1))) < 0) +# define IS_UNSIGNED(type) (((type) (1ul << (8 * sizeof(type) - 1))) > 0) +#else +# define IS_SIGNED(type) (((type) (-1)) < ((type) 0)) +# define IS_UNSIGNED(type) (((type) (-1)) > ((type) 0)) +#endif + +static lzo_bool schedule_insns_bug(void); +static lzo_bool strength_reduce_bug(int *); + +#if 0 || defined(LZO_DEBUG) +static lzo_bool __lzo_assert_fail(const char *s, unsigned line) +{ +#if defined(__palmos__) + printf("LZO assertion failed in line %u: '%s'\n",line,s); +#else + fprintf(stderr,"LZO assertion failed in line %u: '%s'\n",line,s); +#endif + return 0; +} +# define __lzo_assert(x) ((x) ? 1 : __lzo_assert_fail(#x,__LINE__)) +#else +# define __lzo_assert(x) ((x) ? 1 : 0) +#endif + +static lzo_bool basic_integral_check(void) +{ + lzo_bool r = 1; + lzo_bool sanity; + + r &= __lzo_assert(CHAR_BIT == 8); + r &= __lzo_assert(sizeof(char) == 1); + r &= __lzo_assert(sizeof(short) >= 2); + r &= __lzo_assert(sizeof(long) >= 4); + r &= __lzo_assert(sizeof(int) >= sizeof(short)); + r &= __lzo_assert(sizeof(long) >= sizeof(int)); + + r &= __lzo_assert(sizeof(lzo_uint32) >= 4); + r &= __lzo_assert(sizeof(lzo_uint32) >= sizeof(unsigned)); +#if defined(__LZO_STRICT_16BIT) + r &= __lzo_assert(sizeof(lzo_uint) == 2); +#else + r &= __lzo_assert(sizeof(lzo_uint) >= 4); + r &= __lzo_assert(sizeof(lzo_uint) >= sizeof(unsigned)); +#endif + +#if defined(SIZEOF_UNSIGNED) + r &= __lzo_assert(SIZEOF_UNSIGNED == sizeof(unsigned)); +#endif +#if defined(SIZEOF_UNSIGNED_LONG) + r &= __lzo_assert(SIZEOF_UNSIGNED_LONG == sizeof(unsigned long)); +#endif +#if defined(SIZEOF_UNSIGNED_SHORT) + r &= __lzo_assert(SIZEOF_UNSIGNED_SHORT == sizeof(unsigned short)); +#endif +#if !defined(__LZO_IN_MINILZO) +#if defined(SIZEOF_SIZE_T) + r &= __lzo_assert(SIZEOF_SIZE_T == sizeof(size_t)); +#endif +#endif + + sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) && + IS_UNSIGNED(unsigned long) && + IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long); + if (sanity) + { + r &= __lzo_assert(IS_UNSIGNED(lzo_uint32)); + r &= __lzo_assert(IS_UNSIGNED(lzo_uint)); + r &= __lzo_assert(IS_SIGNED(lzo_int32)); + r &= __lzo_assert(IS_SIGNED(lzo_int)); + + r &= __lzo_assert(INT_MAX == LZO_STYPE_MAX(sizeof(int))); + r &= __lzo_assert(UINT_MAX == LZO_UTYPE_MAX(sizeof(unsigned))); + r &= __lzo_assert(LONG_MAX == LZO_STYPE_MAX(sizeof(long))); + r &= __lzo_assert(ULONG_MAX == LZO_UTYPE_MAX(sizeof(unsigned long))); + r &= __lzo_assert(SHRT_MAX == LZO_STYPE_MAX(sizeof(short))); + r &= __lzo_assert(USHRT_MAX == LZO_UTYPE_MAX(sizeof(unsigned short))); + r &= __lzo_assert(LZO_UINT32_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint32))); + r &= __lzo_assert(LZO_UINT_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint))); +#if !defined(__LZO_IN_MINILZO) + r &= __lzo_assert(SIZE_T_MAX == LZO_UTYPE_MAX(sizeof(size_t))); +#endif + } + +#if 0 + r &= __lzo_assert(LZO_BYTE(257) == 1); + r &= __lzo_assert(LZO_USHORT(65537L) == 1); +#endif + + return r; +} + +static lzo_bool basic_ptr_check(void) +{ + lzo_bool r = 1; + lzo_bool sanity; + + r &= __lzo_assert(sizeof(char *) >= sizeof(int)); + r &= __lzo_assert(sizeof(lzo_byte *) >= sizeof(char *)); + + r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_byte *)); + r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_voidpp)); + r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_bytepp)); + r &= __lzo_assert(sizeof(lzo_voidp) >= sizeof(lzo_uint)); + + r &= __lzo_assert(sizeof(lzo_ptr_t) == sizeof(lzo_voidp)); + r &= __lzo_assert(sizeof(lzo_ptr_t) >= sizeof(lzo_uint)); + + r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= 4); + r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= sizeof(ptrdiff_t)); + +#if defined(SIZEOF_CHAR_P) + r &= __lzo_assert(SIZEOF_CHAR_P == sizeof(char *)); +#endif +#if defined(SIZEOF_PTRDIFF_T) + r &= __lzo_assert(SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t)); +#endif + + sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) && + IS_UNSIGNED(unsigned long) && + IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long); + if (sanity) + { + r &= __lzo_assert(IS_UNSIGNED(lzo_ptr_t)); + r &= __lzo_assert(IS_UNSIGNED(lzo_moff_t)); + r &= __lzo_assert(IS_SIGNED(lzo_ptrdiff_t)); + r &= __lzo_assert(IS_SIGNED(lzo_sptr_t)); + } + + return r; +} + +static lzo_bool ptr_check(void) +{ + lzo_bool r = 1; + int i; + char _wrkmem[10 * sizeof(lzo_byte *) + sizeof(lzo_align_t)]; + lzo_byte *wrkmem; + lzo_bytepp dict; + unsigned char x[4 * sizeof(lzo_align_t)]; + long d; + lzo_align_t a; + + for (i = 0; i < (int) sizeof(x); i++) + x[i] = LZO_BYTE(i); + + wrkmem = LZO_PTR_ALIGN_UP((lzo_byte *)_wrkmem,sizeof(lzo_align_t)); + dict = (lzo_bytepp) wrkmem; + + d = (long) ((const lzo_bytep) dict - (const lzo_bytep) _wrkmem); + r &= __lzo_assert(d >= 0); + r &= __lzo_assert(d < (long) sizeof(lzo_align_t)); + + memset(&a,0xff,sizeof(a)); + r &= __lzo_assert(a.a_ushort == USHRT_MAX); + r &= __lzo_assert(a.a_uint == UINT_MAX); + r &= __lzo_assert(a.a_ulong == ULONG_MAX); + r &= __lzo_assert(a.a_lzo_uint == LZO_UINT_MAX); + + if (r == 1) + { + for (i = 0; i < 8; i++) + r &= __lzo_assert((const lzo_voidp) (&dict[i]) == (const lzo_voidp) (&wrkmem[i * sizeof(lzo_byte *)])); + } + + memset(&a,0,sizeof(a)); + r &= __lzo_assert(a.a_charp == NULL); + r &= __lzo_assert(a.a_lzo_bytep == NULL); + //r &= __lzo_assert(NULL == 0); + if (r == 1) + { + for (i = 0; i < 10; i++) + dict[i] = wrkmem; + BZERO8_PTR(dict+1,sizeof(dict[0]),8); + r &= __lzo_assert(dict[0] == wrkmem); + for (i = 1; i < 9; i++) + r &= __lzo_assert(dict[i] == NULL); + r &= __lzo_assert(dict[9] == wrkmem); + } + + if (r == 1) + { + unsigned k = 1; + const unsigned n = (unsigned) sizeof(lzo_uint32); + lzo_byte *p0; + lzo_byte *p1; + + k += __lzo_align_gap(&x[k],n); + p0 = (lzo_bytep) &x[k]; +#if defined(PTR_LINEAR) + r &= __lzo_assert((PTR_LINEAR(p0) & (n-1)) == 0); +#else + r &= __lzo_assert(n == 4); + r &= __lzo_assert(PTR_ALIGNED_4(p0)); +#endif + + r &= __lzo_assert(k >= 1); + p1 = (lzo_bytep) &x[1]; + r &= __lzo_assert(PTR_GE(p0,p1)); + + r &= __lzo_assert(k < 1+n); + p1 = (lzo_bytep) &x[1+n]; + r &= __lzo_assert(PTR_LT(p0,p1)); + + if (r == 1) + { + lzo_uint32 v0 = * (lzo_uint32 *) &x[k]; + lzo_uint32 v1 = * (lzo_uint32 *) &x[k+n]; + + r &= __lzo_assert(v0 > 0); + r &= __lzo_assert(v1 > 0); + } + } + + return r; +} + +LZO_PUBLIC(int) +_lzo_config_check(void) +{ + lzo_bool r = 1; + int i; + union { + lzo_uint32 a; + unsigned short b; + lzo_uint32 aa[4]; + unsigned char x[4*sizeof(lzo_align_t)]; + } u; + +#if 0 + r &= __lzo_assert((const void *)&u == (const void *)&u.a); + r &= __lzo_assert((const void *)&u == (const void *)&u.b); + r &= __lzo_assert((const void *)&u == (const void *)&u.x[0]); + r &= __lzo_assert((const void *)&u == (const void *)&u.aa[0]); +#endif + + r &= basic_integral_check(); + r &= basic_ptr_check(); + if (r != 1) + return LZO_E_ERROR; + + u.a = 0; u.b = 0; + for (i = 0; i < (int) sizeof(u.x); i++) + u.x[i] = LZO_BYTE(i); + +#if 0 + r &= __lzo_assert( (int) (unsigned char) ((char) -1) == 255); +#endif + +#if defined(LZO_BYTE_ORDER) + if (r == 1) + { +# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) + lzo_uint32 a = (lzo_uint32) (u.a & LZO_0xffffffffL); + unsigned short b = (unsigned short) (u.b & 0xffff); + r &= __lzo_assert(a == 0x03020100L); + r &= __lzo_assert(b == 0x0100); +# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) + lzo_uint32 a = u.a >> (8 * sizeof(u.a) - 32); + unsigned short b = u.b >> (8 * sizeof(u.b) - 16); + r &= __lzo_assert(a == 0x00010203L); + r &= __lzo_assert(b == 0x0001); +# else +# error invalid LZO_BYTE_ORDER +# endif + } +#endif + +#if defined(LZO_UNALIGNED_OK_2) + r &= __lzo_assert(sizeof(short) == 2); + if (r == 1) + { + unsigned short b[4]; + + for (i = 0; i < 4; i++) + b[i] = * (const unsigned short *) &u.x[i]; + +# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) + r &= __lzo_assert(b[0] == 0x0100); + r &= __lzo_assert(b[1] == 0x0201); + r &= __lzo_assert(b[2] == 0x0302); + r &= __lzo_assert(b[3] == 0x0403); +# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) + r &= __lzo_assert(b[0] == 0x0001); + r &= __lzo_assert(b[1] == 0x0102); + r &= __lzo_assert(b[2] == 0x0203); + r &= __lzo_assert(b[3] == 0x0304); +# endif + } +#endif + +#if defined(LZO_UNALIGNED_OK_4) + r &= __lzo_assert(sizeof(lzo_uint32) == 4); + if (r == 1) + { + lzo_uint32 a[4]; + + for (i = 0; i < 4; i++) + a[i] = * (const lzo_uint32 *) &u.x[i]; + +# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) + r &= __lzo_assert(a[0] == 0x03020100L); + r &= __lzo_assert(a[1] == 0x04030201L); + r &= __lzo_assert(a[2] == 0x05040302L); + r &= __lzo_assert(a[3] == 0x06050403L); +# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) + r &= __lzo_assert(a[0] == 0x00010203L); + r &= __lzo_assert(a[1] == 0x01020304L); + r &= __lzo_assert(a[2] == 0x02030405L); + r &= __lzo_assert(a[3] == 0x03040506L); +# endif + } +#endif + +#if defined(LZO_ALIGNED_OK_4) + r &= __lzo_assert(sizeof(lzo_uint32) == 4); +#endif + + r &= __lzo_assert(lzo_sizeof_dict_t == sizeof(lzo_dict_t)); + +#if defined(__LZO_IN_MINLZO) + if (r == 1) + { + lzo_uint32 adler; + adler = lzo_adler32(0, NULL, 0); + adler = lzo_adler32(adler, lzo_copyright(), 200); + r &= __lzo_assert(adler == 0x7ea34377L); + } +#endif + + if (r == 1) + { + r &= __lzo_assert(!schedule_insns_bug()); + } + + if (r == 1) + { + static int x[3]; + static unsigned xn = 3; + register unsigned j; + + for (j = 0; j < xn; j++) + x[j] = (int)j - 3; + r &= __lzo_assert(!strength_reduce_bug(x)); + } + + if (r == 1) + { + r &= ptr_check(); + } + + return r == 1 ? LZO_E_OK : LZO_E_ERROR; +} + +static lzo_bool schedule_insns_bug(void) +{ +#if defined(__LZO_CHECKER) + return 0; +#else + const int clone[] = {1, 2, 0}; + const int *q; + q = clone; + return (*q) ? 0 : 1; +#endif +} + +static lzo_bool strength_reduce_bug(int *x) +{ + return x[0] != -3 || x[1] != -2 || x[2] != -1; +} + +int __lzo_init_done = 0; + +LZO_PUBLIC(int) +__lzo_init2(unsigned v, int s1, int s2, int s3, int s4, int s5, + int s6, int s7, int s8, int s9) +{ + int r; + + __lzo_init_done = 1; + + if (v == 0) + return LZO_E_ERROR; + + r = (s1 == -1 || s1 == (int) sizeof(short)) && + (s2 == -1 || s2 == (int) sizeof(int)) && + (s3 == -1 || s3 == (int) sizeof(long)) && + (s4 == -1 || s4 == (int) sizeof(lzo_uint32)) && + (s5 == -1 || s5 == (int) sizeof(lzo_uint)) && + (s6 == -1 || s6 == (int) lzo_sizeof_dict_t) && + (s7 == -1 || s7 == (int) sizeof(char *)) && + (s8 == -1 || s8 == (int) sizeof(lzo_voidp)) && + (s9 == -1 || s9 == (int) sizeof(lzo_compress_t)); + if (!r) + return LZO_E_ERROR; + + r = _lzo_config_check(); + if (r != LZO_E_OK) + return r; + + return r; +} + +#if !defined(__LZO_IN_MINILZO) + +LZO_EXTERN(int) +__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7); + +LZO_PUBLIC(int) +__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7) +{ + if (v == 0 || v > 0x1010) + return LZO_E_ERROR; + return __lzo_init2(v,s1,s2,s3,s4,s5,-1,-1,s6,s7); +} + +#endif + +#define do_compress _lzo1x_1_do_compress + +#define LZO_NEED_DICT_H +#define D_BITS 14 +#define D_INDEX1(d,p) d = DM((0x21*DX3(p,5,5,6)) >> 5) +#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) + +#ifndef __LZO_CONFIG1X_H +#define __LZO_CONFIG1X_H + +#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z) +# define LZO1X +#endif + +#if !defined(__LZO_IN_MINILZO) +#include +#endif + +#define LZO_EOF_CODE +#undef LZO_DETERMINISTIC + +#define M1_MAX_OFFSET 0x0400 +#ifndef M2_MAX_OFFSET +#define M2_MAX_OFFSET 0x0800 +#endif +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff + +#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET) + +#define M1_MIN_LEN 2 +#define M1_MAX_LEN 2 +#define M2_MIN_LEN 3 +#ifndef M2_MAX_LEN +#define M2_MAX_LEN 8 +#endif +#define M3_MIN_LEN 3 +#define M3_MAX_LEN 33 +#define M4_MIN_LEN 3 +#define M4_MAX_LEN 9 + +#define M1_MARKER 0 +#define M2_MARKER 64 +#define M3_MARKER 32 +#define M4_MARKER 16 + +#ifndef MIN_LOOKAHEAD +#define MIN_LOOKAHEAD (M2_MAX_LEN + 1) +#endif + +#if defined(LZO_NEED_DICT_H) + +#ifndef LZO_HASH +#define LZO_HASH LZO_HASH_LZO_INCREMENTAL_B +#endif +#define DL_MIN_LEN M2_MIN_LEN + +#ifndef __LZO_DICT_H +#define __LZO_DICT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(D_BITS) && defined(DBITS) +# define D_BITS DBITS +#endif +#if !defined(D_BITS) +# error D_BITS is not defined +#endif +#if (D_BITS < 16) +# define D_SIZE LZO_SIZE(D_BITS) +# define D_MASK LZO_MASK(D_BITS) +#else +# define D_SIZE LZO_USIZE(D_BITS) +# define D_MASK LZO_UMASK(D_BITS) +#endif +#define D_HIGH ((D_MASK >> 1) + 1) + +#if !defined(DD_BITS) +# define DD_BITS 0 +#endif +#define DD_SIZE LZO_SIZE(DD_BITS) +#define DD_MASK LZO_MASK(DD_BITS) + +#if !defined(DL_BITS) +# define DL_BITS (D_BITS - DD_BITS) +#endif +#if (DL_BITS < 16) +# define DL_SIZE LZO_SIZE(DL_BITS) +# define DL_MASK LZO_MASK(DL_BITS) +#else +# define DL_SIZE LZO_USIZE(DL_BITS) +# define DL_MASK LZO_UMASK(DL_BITS) +#endif + +#if (D_BITS != DL_BITS + DD_BITS) +# error D_BITS does not match +#endif +#if (D_BITS < 8 || D_BITS > 18) +# error invalid D_BITS +#endif +#if (DL_BITS < 8 || DL_BITS > 20) +# error invalid DL_BITS +#endif +#if (DD_BITS < 0 || DD_BITS > 6) +# error invalid DD_BITS +#endif + +#if !defined(DL_MIN_LEN) +# define DL_MIN_LEN 3 +#endif +#if !defined(DL_SHIFT) +# define DL_SHIFT ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN) +#endif + +#define LZO_HASH_GZIP 1 +#define LZO_HASH_GZIP_INCREMENTAL 2 +#define LZO_HASH_LZO_INCREMENTAL_A 3 +#define LZO_HASH_LZO_INCREMENTAL_B 4 + +#if !defined(LZO_HASH) +# error choose a hashing strategy +#endif + +#if (DL_MIN_LEN == 3) +# define _DV2_A(p,shift1,shift2) \ + (((( (lzo_uint32)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2]) +# define _DV2_B(p,shift1,shift2) \ + (((( (lzo_uint32)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0]) +# define _DV3_B(p,shift1,shift2,shift3) \ + ((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0]) +#elif (DL_MIN_LEN == 2) +# define _DV2_A(p,shift1,shift2) \ + (( (lzo_uint32)(p[0]) << shift1) ^ p[1]) +# define _DV2_B(p,shift1,shift2) \ + (( (lzo_uint32)(p[1]) << shift1) ^ p[2]) +#else +# error invalid DL_MIN_LEN +#endif +#define _DV_A(p,shift) _DV2_A(p,shift,shift) +#define _DV_B(p,shift) _DV2_B(p,shift,shift) +#define DA2(p,s1,s2) \ + (((((lzo_uint32)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0]) +#define DS2(p,s1,s2) \ + (((((lzo_uint32)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0]) +#define DX2(p,s1,s2) \ + (((((lzo_uint32)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) +#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0]) +#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0]) +#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0]) +#define DMS(v,s) ((lzo_uint) (((v) & (D_MASK >> (s))) << (s))) +#define DM(v) DMS(v,0) + +#if (LZO_HASH == LZO_HASH_GZIP) +# define _DINDEX(dv,p) (_DV_A((p),DL_SHIFT)) + +#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL) +# define __LZO_HASH_INCREMENTAL +# define DVAL_FIRST(dv,p) dv = _DV_A((p),DL_SHIFT) +# define DVAL_NEXT(dv,p) dv = (((dv) << DL_SHIFT) ^ p[2]) +# define _DINDEX(dv,p) (dv) +# define DVAL_LOOKAHEAD DL_MIN_LEN + +#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A) +# define __LZO_HASH_INCREMENTAL +# define DVAL_FIRST(dv,p) dv = _DV_A((p),5) +# define DVAL_NEXT(dv,p) \ + dv ^= (lzo_uint32)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2]) +# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5) +# define DVAL_LOOKAHEAD DL_MIN_LEN + +#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B) +# define __LZO_HASH_INCREMENTAL +# define DVAL_FIRST(dv,p) dv = _DV_B((p),5) +# define DVAL_NEXT(dv,p) \ + dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_uint32)(p[2]) << (2*5))) +# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5) +# define DVAL_LOOKAHEAD DL_MIN_LEN + +#else +# error choose a hashing strategy +#endif + +#ifndef DINDEX +#define DINDEX(dv,p) ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS) +#endif +#if !defined(DINDEX1) && defined(D_INDEX1) +#define DINDEX1 D_INDEX1 +#endif +#if !defined(DINDEX2) && defined(D_INDEX2) +#define DINDEX2 D_INDEX2 +#endif + +#if !defined(__LZO_HASH_INCREMENTAL) +# define DVAL_FIRST(dv,p) ((void) 0) +# define DVAL_NEXT(dv,p) ((void) 0) +# define DVAL_LOOKAHEAD 0 +#endif + +#if !defined(DVAL_ASSERT) +#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG) +static void DVAL_ASSERT(lzo_uint32 dv, const lzo_byte *p) +{ + lzo_uint32 df; + DVAL_FIRST(df,(p)); + assert(DINDEX(dv,p) == DINDEX(df,p)); +} +#else +# define DVAL_ASSERT(dv,p) ((void) 0) +#endif +#endif + +#if defined(LZO_DICT_USE_PTR) +# define DENTRY(p,in) (p) +# define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex] +#else +# define DENTRY(p,in) ((lzo_uint) ((p)-(in))) +# define GINDEX(m_pos,m_off,dict,dindex,in) m_off = dict[dindex] +#endif + +#if (DD_BITS == 0) + +# define UPDATE_D(dict,drun,dv,p,in) dict[ DINDEX(dv,p) ] = DENTRY(p,in) +# define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in) +# define UPDATE_P(ptr,drun,p,in) (ptr)[0] = DENTRY(p,in) + +#else + +# define UPDATE_D(dict,drun,dv,p,in) \ + dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK +# define UPDATE_I(dict,drun,index,p,in) \ + dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK +# define UPDATE_P(ptr,drun,p,in) \ + (ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK + +#endif + +#if defined(LZO_DICT_USE_PTR) + +#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \ + (m_pos == NULL || (m_off = (lzo_moff_t) (ip - m_pos)) > max_offset) + +#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ + (BOUNDS_CHECKING_OFF_IN_EXPR( \ + (PTR_LT(m_pos,in) || \ + (m_off = (lzo_moff_t) PTR_DIFF(ip,m_pos)) <= 0 || \ + m_off > max_offset) )) + +#else + +#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \ + (m_off == 0 || \ + ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \ + (m_pos = (ip) - (m_off), 0) ) + +#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ + ((lzo_moff_t) ((ip)-(in)) <= m_off || \ + ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \ + (m_pos = (ip) - (m_off), 0) ) + +#endif + +#if defined(LZO_DETERMINISTIC) +# define LZO_CHECK_MPOS LZO_CHECK_MPOS_DET +#else +# define LZO_CHECK_MPOS LZO_CHECK_MPOS_NON_DET +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +#endif + +#endif + +#define DO_COMPRESS lzo1x_1_compress + +static +lzo_uint do_compress ( const lzo_byte *in , lzo_uint in_len, + lzo_byte *out, lzo_uint *out_len, + lzo_voidp wrkmem ) +{ +#if 0 && defined(__GNUC__) && defined(__i386__) + register const lzo_byte *ip __asm__("%esi"); +#else + register const lzo_byte *ip; +#endif + lzo_byte *op; + const lzo_byte * const in_end = in + in_len; + const lzo_byte * const ip_end = in + in_len - M2_MAX_LEN - 5; + const lzo_byte *ii; + lzo_dict_p const dict = (lzo_dict_p) wrkmem; + + op = out; + ip = in; + ii = ip; + + ip += 4; + for (;;) + { +#if 0 && defined(__GNUC__) && defined(__i386__) + register const lzo_byte *m_pos __asm__("%edi"); +#else + register const lzo_byte *m_pos; +#endif + lzo_moff_t m_off; + lzo_uint m_len; + lzo_uint dindex; + + DINDEX1(dindex,ip); + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; +#if 1 + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + DINDEX2(dindex,ip); +#endif + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + goto literal; + +try_match: +#if 1 && defined(LZO_UNALIGNED_OK_2) + if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip) +#else + if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) +#endif + { + } + else + { + if (m_pos[2] == ip[2]) + { +#if 0 + if (m_off <= M2_MAX_OFFSET) + goto match; + if (lit <= 3) + goto match; + if (lit == 3) + { + assert(op - 2 > out); op[-2] |= LZO_BYTE(3); + *op++ = *ii++; *op++ = *ii++; *op++ = *ii++; + goto code_match; + } + if (m_pos[3] == ip[3]) +#endif + goto match; + } + else + { +#if 0 +#if 0 + if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3) +#else + if (m_off <= M1_MAX_OFFSET && lit == 3) +#endif + { + register lzo_uint t; + + t = lit; + assert(op - 2 > out); op[-2] |= LZO_BYTE(t); + do *op++ = *ii++; while (--t > 0); + assert(ii == ip); + m_off -= 1; + *op++ = LZO_BYTE(M1_MARKER | ((m_off & 3) << 2)); + *op++ = LZO_BYTE(m_off >> 2); + ip += 2; + goto match_done; + } +#endif + } + } + +literal: + UPDATE_I(dict,0,dindex,ip,in); + ++ip; + if (ip >= ip_end) + break; + continue; + +match: + UPDATE_I(dict,0,dindex,ip,in); + if (ip - ii > 0) + { + register lzo_uint t = ip - ii; + + if (t <= 3) + { + assert(op - 2 > out); + op[-2] |= LZO_BYTE(t); + } + else if (t <= 18) + *op++ = LZO_BYTE(t - 3); + else + { + register lzo_uint tt = t - 18; + + *op++ = 0; + while (tt > 255) + { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = LZO_BYTE(tt); + } + do *op++ = *ii++; while (--t > 0); + } + + assert(ii == ip); + ip += 3; + if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ || + m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++ +#ifdef LZO1Y + || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++ + || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++ +#endif + ) + { + --ip; + m_len = ip - ii; + assert(m_len >= 3); assert(m_len <= M2_MAX_LEN); + + if (m_off <= M2_MAX_OFFSET) + { + m_off -= 1; +#if defined(LZO1X) + *op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = LZO_BYTE(m_off >> 3); +#elif defined(LZO1Y) + *op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2)); + *op++ = LZO_BYTE(m_off >> 2); +#endif + } + else if (m_off <= M3_MAX_OFFSET) + { + m_off -= 1; + *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); + goto m3_m4_offset; + } + else +#if defined(LZO1X) + { + m_off -= 0x4000; + assert(m_off > 0); assert(m_off <= 0x7fff); + *op++ = LZO_BYTE(M4_MARKER | + ((m_off & 0x4000) >> 11) | (m_len - 2)); + goto m3_m4_offset; + } +#elif defined(LZO1Y) + goto m4_match; +#endif + } + else + { + { + const lzo_byte *end = in_end; + const lzo_byte *m = m_pos + M2_MAX_LEN + 1; + while (ip < end && *m == *ip) + m++, ip++; + m_len = (ip - ii); + } + assert(m_len > M2_MAX_LEN); + + if (m_off <= M3_MAX_OFFSET) + { + m_off -= 1; + if (m_len <= 33) + *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); + else + { + m_len -= 33; + *op++ = M3_MARKER | 0; + goto m3_m4_len; + } + } + else + { +#if defined(LZO1Y) +m4_match: +#endif + m_off -= 0x4000; + assert(m_off > 0); assert(m_off <= 0x7fff); + if (m_len <= M4_MAX_LEN) + *op++ = LZO_BYTE(M4_MARKER | + ((m_off & 0x4000) >> 11) | (m_len - 2)); + else + { + m_len -= M4_MAX_LEN; + *op++ = LZO_BYTE(M4_MARKER | ((m_off & 0x4000) >> 11)); +m3_m4_len: + while (m_len > 255) + { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = LZO_BYTE(m_len); + } + } + +m3_m4_offset: + *op++ = LZO_BYTE((m_off & 63) << 2); + *op++ = LZO_BYTE(m_off >> 6); + } + +#if 0 +match_done: +#endif + ii = ip; + if (ip >= ip_end) + break; + } + + *out_len = op - out; + return (lzo_uint) (in_end - ii); +} + +LZO_PUBLIC(int) +DO_COMPRESS ( const lzo_byte *in , lzo_uint in_len, + lzo_byte *out, lzo_uint *out_len, + lzo_voidp wrkmem ) +{ + lzo_byte *op = out; + lzo_uint t; + +#if defined(__LZO_QUERY_COMPRESS) + if (__LZO_IS_COMPRESS_QUERY(in,in_len,out,out_len,wrkmem)) + return __LZO_QUERY_COMPRESS(in,in_len,out,out_len,wrkmem,D_SIZE,lzo_sizeof(lzo_dict_t)); +#endif + + if (in_len <= M2_MAX_LEN + 5) + t = in_len; + else + { + t = do_compress(in,in_len,op,out_len,wrkmem); + op += *out_len; + } + + if (t > 0) + { + const lzo_byte *ii = in + in_len - t; + + if (op == out && t <= 238) + *op++ = LZO_BYTE(17 + t); + else if (t <= 3) + op[-2] |= LZO_BYTE(t); + else if (t <= 18) + *op++ = LZO_BYTE(t - 3); + else + { + lzo_uint tt = t - 18; + + *op++ = 0; + while (tt > 255) + { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = LZO_BYTE(tt); + } + do *op++ = *ii++; while (--t > 0); + } + + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; + + *out_len = op - out; + return LZO_E_OK; +} + +#undef do_compress +#undef DO_COMPRESS +#undef LZO_HASH + +#undef LZO_TEST_DECOMPRESS_OVERRUN +#undef LZO_TEST_DECOMPRESS_OVERRUN_INPUT +#undef LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT +#undef LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND +#undef DO_DECOMPRESS +#define DO_DECOMPRESS lzo1x_decompress + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN) +# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) +# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2 +# endif +# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) +# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2 +# endif +# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) +# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND +# endif +#endif + +#undef TEST_IP +#undef TEST_OP +#undef TEST_LOOKBEHIND +#undef NEED_IP +#undef NEED_OP +#undef HAVE_TEST_IP +#undef HAVE_TEST_OP +#undef HAVE_NEED_IP +#undef HAVE_NEED_OP +#undef HAVE_ANY_IP +#undef HAVE_ANY_OP + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) +# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1) +# define TEST_IP (ip < ip_end) +# endif +# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2) +# define NEED_IP(x) \ + if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun +# endif +#endif + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) +# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1) +# define TEST_OP (op <= op_end) +# endif +# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2) +# undef TEST_OP +# define NEED_OP(x) \ + if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun +# endif +#endif + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) +# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun +#else +# define TEST_LOOKBEHIND(m_pos,op) ((void) 0) +#endif + +#if !defined(LZO_EOF_CODE) && !defined(TEST_IP) +# define TEST_IP (ip < ip_end) +#endif + +#if defined(TEST_IP) +# define HAVE_TEST_IP +#else +# define TEST_IP 1 +#endif +#if defined(TEST_OP) +# define HAVE_TEST_OP +#else +# define TEST_OP 1 +#endif + +#if defined(NEED_IP) +# define HAVE_NEED_IP +#else +# define NEED_IP(x) ((void) 0) +#endif +#if defined(NEED_OP) +# define HAVE_NEED_OP +#else +# define NEED_OP(x) ((void) 0) +#endif + +#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP) +# define HAVE_ANY_IP +#endif +#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP) +# define HAVE_ANY_OP +#endif + +#if defined(DO_DECOMPRESS) +LZO_PUBLIC(int) +DO_DECOMPRESS ( const lzo_byte *in , lzo_uint in_len, + lzo_byte *out, lzo_uint *out_len, + lzo_voidp wrkmem ) +#endif +{ + register lzo_byte *op; + register const lzo_byte *ip; + register lzo_uint t; +#if defined(COPY_DICT) + lzo_uint m_off; + const lzo_byte *dict_end; +#else + register const lzo_byte *m_pos; +#endif + + const lzo_byte * const ip_end = in + in_len; +#if defined(HAVE_ANY_OP) + lzo_byte * const op_end = out + *out_len; +#endif +#if defined(LZO1Z) + lzo_uint last_m_off = 0; +#endif + + LZO_UNUSED(wrkmem); + +#if defined(__LZO_QUERY_DECOMPRESS) + if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem)) + return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0); +#endif + +#if defined(COPY_DICT) + if (dict) + { + if (dict_len > M4_MAX_OFFSET) + { + dict += dict_len - M4_MAX_OFFSET; + dict_len = M4_MAX_OFFSET; + } + dict_end = dict + dict_len; + } + else + { + dict_len = 0; + dict_end = NULL; + } +#endif + + *out_len = 0; + + op = out; + ip = in; + + if (*ip > 17) + { + t = *ip++ - 17; + if (t < 4) + goto match_next; + assert(t > 0); NEED_OP(t); NEED_IP(t+1); + do *op++ = *ip++; while (--t > 0); + goto first_literal_run; + } + + while (TEST_IP && TEST_OP) + { + t = *ip++; + if (t >= 16) + goto match; + if (t == 0) + { + NEED_IP(1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP(1); + } + t += 15 + *ip++; + } + assert(t > 0); NEED_OP(t+3); NEED_IP(t+4); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +#if !defined(LZO_UNALIGNED_OK_4) + if (PTR_ALIGNED2_4(op,ip)) + { +#endif + * (lzo_uint32p) op = * (const lzo_uint32p) ip; + op += 4; ip += 4; + if (--t > 0) + { + if (t >= 4) + { + do { + * (lzo_uint32p) op = * (const lzo_uint32p) ip; + op += 4; ip += 4; t -= 4; + } while (t >= 4); + if (t > 0) do *op++ = *ip++; while (--t > 0); + } + else + do *op++ = *ip++; while (--t > 0); + } +#if !defined(LZO_UNALIGNED_OK_4) + } + else +#endif +#endif +#if !defined(LZO_UNALIGNED_OK_4) + { + *op++ = *ip++; *op++ = *ip++; *op++ = *ip++; + do *op++ = *ip++; while (--t > 0); + } +#endif + +first_literal_run: + + t = *ip++; + if (t >= 16) + goto match; +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(3); + t = 3; COPY_DICT(t,m_off) +#else +#if defined(LZO1Z) + t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LOOKBEHIND(m_pos,out); NEED_OP(3); + *op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos; +#endif + goto match_done; + + while (TEST_IP && TEST_OP) + { +match: + if (t >= 64) + { +#if defined(COPY_DICT) +#if defined(LZO1X) + m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); + t = (t >> 4) - 3; +#elif defined(LZO1Z) + m_off = t & 0x1f; + if (m_off >= 0x1c) + m_off = last_m_off; + else + { + m_off = 1 + (m_off << 6) + (*ip++ >> 2); + last_m_off = m_off; + } + t = (t >> 5) - 1; +#endif +#else +#if defined(LZO1X) + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_pos = op - 1; + m_pos -= (t >> 2) & 3; + m_pos -= *ip++ << 2; + t = (t >> 4) - 3; +#elif defined(LZO1Z) + { + lzo_uint off = t & 0x1f; + m_pos = op; + if (off >= 0x1c) + { + assert(last_m_off > 0); + m_pos -= last_m_off; + } + else + { + off = 1 + (off << 6) + (*ip++ >> 2); + m_pos -= off; + last_m_off = off; + } + } + t = (t >> 5) - 1; +#endif + TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); + goto copy_match; +#endif + } + else if (t >= 32) + { + t &= 31; + if (t == 0) + { + NEED_IP(1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); + last_m_off = m_off; +#else + m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); +#endif +#else +#if defined(LZO1Z) + { + lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2); + m_pos = op - off; + last_m_off = off; + } +#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) + m_pos = op - 1; + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos = op - 1; + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif +#endif + ip += 2; + } + else if (t >= 16) + { +#if defined(COPY_DICT) + m_off = (t & 8) << 11; +#else + m_pos = op; + m_pos -= (t & 8) << 11; +#endif + t &= 7; + if (t == 0) + { + NEED_IP(1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off += (ip[0] << 6) + (ip[1] >> 2); +#else + m_off += (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_off == 0) + goto eof_found; + m_off += 0x4000; +#if defined(LZO1Z) + last_m_off = m_off; +#endif +#else +#if defined(LZO1Z) + m_pos -= (ip[0] << 6) + (ip[1] >> 2); +#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; +#if defined(LZO1Z) + last_m_off = op - m_pos; +#endif +#endif + } + else + { +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = 1 + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(2); + t = 2; COPY_DICT(t,m_off) +#else +#if defined(LZO1Z) + t = 1 + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LOOKBEHIND(m_pos,out); NEED_OP(2); + *op++ = *m_pos++; *op++ = *m_pos; +#endif + goto match_done; + } + +#if defined(COPY_DICT) + + NEED_OP(t+3-1); + t += 3-1; COPY_DICT(t,m_off) + +#else + + TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +#if !defined(LZO_UNALIGNED_OK_4) + if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) + { + assert((op - m_pos) >= 4); +#else + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) + { +#endif + * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; + op += 4; m_pos += 4; t -= 4 - (3 - 1); + do { + * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; + op += 4; m_pos += 4; t -= 4; + } while (t >= 4); + if (t > 0) do *op++ = *m_pos++; while (--t > 0); + } + else +#endif + { +copy_match: + *op++ = *m_pos++; *op++ = *m_pos++; + do *op++ = *m_pos++; while (--t > 0); + } + +#endif + +match_done: +#if defined(LZO1Z) + t = ip[-1] & 3; +#else + t = ip[-2] & 3; +#endif + if (t == 0) + break; + +match_next: + assert(t > 0); NEED_OP(t); NEED_IP(t+1); + do *op++ = *ip++; while (--t > 0); + t = *ip++; + } + } + +#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) + *out_len = op - out; + return LZO_E_EOF_NOT_FOUND; +#endif + +eof_found: + assert(t == 1); + *out_len = op - out; + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + +#if defined(HAVE_NEED_IP) +input_overrun: + *out_len = op - out; + return LZO_E_INPUT_OVERRUN; +#endif + +#if defined(HAVE_NEED_OP) +output_overrun: + *out_len = op - out; + return LZO_E_OUTPUT_OVERRUN; +#endif + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) +lookbehind_overrun: + *out_len = op - out; + return LZO_E_LOOKBEHIND_OVERRUN; +#endif +} + +#define LZO_TEST_DECOMPRESS_OVERRUN +#undef DO_DECOMPRESS +#define DO_DECOMPRESS lzo1x_decompress_safe + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN) +# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) +# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2 +# endif +# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) +# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2 +# endif +# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) +# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND +# endif +#endif + +#undef TEST_IP +#undef TEST_OP +#undef TEST_LOOKBEHIND +#undef NEED_IP +#undef NEED_OP +#undef HAVE_TEST_IP +#undef HAVE_TEST_OP +#undef HAVE_NEED_IP +#undef HAVE_NEED_OP +#undef HAVE_ANY_IP +#undef HAVE_ANY_OP + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) +# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1) +# define TEST_IP (ip < ip_end) +# endif +# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2) +# define NEED_IP(x) \ + if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun +# endif +#endif + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) +# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1) +# define TEST_OP (op <= op_end) +# endif +# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2) +# undef TEST_OP +# define NEED_OP(x) \ + if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun +# endif +#endif + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) +# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun +#else +# define TEST_LOOKBEHIND(m_pos,op) ((void) 0) +#endif + +#if !defined(LZO_EOF_CODE) && !defined(TEST_IP) +# define TEST_IP (ip < ip_end) +#endif + +#if defined(TEST_IP) +# define HAVE_TEST_IP +#else +# define TEST_IP 1 +#endif +#if defined(TEST_OP) +# define HAVE_TEST_OP +#else +# define TEST_OP 1 +#endif + +#if defined(NEED_IP) +# define HAVE_NEED_IP +#else +# define NEED_IP(x) ((void) 0) +#endif +#if defined(NEED_OP) +# define HAVE_NEED_OP +#else +# define NEED_OP(x) ((void) 0) +#endif + +#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP) +# define HAVE_ANY_IP +#endif +#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP) +# define HAVE_ANY_OP +#endif + +#if defined(DO_DECOMPRESS) +LZO_PUBLIC(int) +DO_DECOMPRESS ( const lzo_byte *in , lzo_uint in_len, + lzo_byte *out, lzo_uint *out_len, + lzo_voidp wrkmem ) +#endif +{ + register lzo_byte *op; + register const lzo_byte *ip; + register lzo_uint t; +#if defined(COPY_DICT) + lzo_uint m_off; + const lzo_byte *dict_end; +#else + register const lzo_byte *m_pos; +#endif + + const lzo_byte * const ip_end = in + in_len; +#if defined(HAVE_ANY_OP) + lzo_byte * const op_end = out + *out_len; +#endif +#if defined(LZO1Z) + lzo_uint last_m_off = 0; +#endif + + LZO_UNUSED(wrkmem); + +#if defined(__LZO_QUERY_DECOMPRESS) + if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem)) + return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0); +#endif + +#if defined(COPY_DICT) + if (dict) + { + if (dict_len > M4_MAX_OFFSET) + { + dict += dict_len - M4_MAX_OFFSET; + dict_len = M4_MAX_OFFSET; + } + dict_end = dict + dict_len; + } + else + { + dict_len = 0; + dict_end = NULL; + } +#endif + + *out_len = 0; + + op = out; + ip = in; + + if (*ip > 17) + { + t = *ip++ - 17; + if (t < 4) + goto match_next; + assert(t > 0); NEED_OP(t); NEED_IP(t+1); + do *op++ = *ip++; while (--t > 0); + goto first_literal_run; + } + + while (TEST_IP && TEST_OP) + { + t = *ip++; + if (t >= 16) + goto match; + if (t == 0) + { + NEED_IP(1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP(1); + } + t += 15 + *ip++; + } + assert(t > 0); NEED_OP(t+3); NEED_IP(t+4); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +#if !defined(LZO_UNALIGNED_OK_4) + if (PTR_ALIGNED2_4(op,ip)) + { +#endif + * (lzo_uint32p) op = * (const lzo_uint32p) ip; + op += 4; ip += 4; + if (--t > 0) + { + if (t >= 4) + { + do { + * (lzo_uint32p) op = * (const lzo_uint32p) ip; + op += 4; ip += 4; t -= 4; + } while (t >= 4); + if (t > 0) do *op++ = *ip++; while (--t > 0); + } + else + do *op++ = *ip++; while (--t > 0); + } +#if !defined(LZO_UNALIGNED_OK_4) + } + else +#endif +#endif +#if !defined(LZO_UNALIGNED_OK_4) + { + *op++ = *ip++; *op++ = *ip++; *op++ = *ip++; + do *op++ = *ip++; while (--t > 0); + } +#endif + +first_literal_run: + + t = *ip++; + if (t >= 16) + goto match; +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(3); + t = 3; COPY_DICT(t,m_off) +#else +#if defined(LZO1Z) + t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LOOKBEHIND(m_pos,out); NEED_OP(3); + *op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos; +#endif + goto match_done; + + while (TEST_IP && TEST_OP) + { +match: + if (t >= 64) + { +#if defined(COPY_DICT) +#if defined(LZO1X) + m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); + t = (t >> 4) - 3; +#elif defined(LZO1Z) + m_off = t & 0x1f; + if (m_off >= 0x1c) + m_off = last_m_off; + else + { + m_off = 1 + (m_off << 6) + (*ip++ >> 2); + last_m_off = m_off; + } + t = (t >> 5) - 1; +#endif +#else +#if defined(LZO1X) + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_pos = op - 1; + m_pos -= (t >> 2) & 3; + m_pos -= *ip++ << 2; + t = (t >> 4) - 3; +#elif defined(LZO1Z) + { + lzo_uint off = t & 0x1f; + m_pos = op; + if (off >= 0x1c) + { + assert(last_m_off > 0); + m_pos -= last_m_off; + } + else + { + off = 1 + (off << 6) + (*ip++ >> 2); + m_pos -= off; + last_m_off = off; + } + } + t = (t >> 5) - 1; +#endif + TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); + goto copy_match; +#endif + } + else if (t >= 32) + { + t &= 31; + if (t == 0) + { + NEED_IP(1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); + last_m_off = m_off; +#else + m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); +#endif +#else +#if defined(LZO1Z) + { + lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2); + m_pos = op - off; + last_m_off = off; + } +#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) + m_pos = op - 1; + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos = op - 1; + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif +#endif + ip += 2; + } + else if (t >= 16) + { +#if defined(COPY_DICT) + m_off = (t & 8) << 11; +#else + m_pos = op; + m_pos -= (t & 8) << 11; +#endif + t &= 7; + if (t == 0) + { + NEED_IP(1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off += (ip[0] << 6) + (ip[1] >> 2); +#else + m_off += (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_off == 0) + goto eof_found; + m_off += 0x4000; +#if defined(LZO1Z) + last_m_off = m_off; +#endif +#else +#if defined(LZO1Z) + m_pos -= (ip[0] << 6) + (ip[1] >> 2); +#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; +#if defined(LZO1Z) + last_m_off = op - m_pos; +#endif +#endif + } + else + { +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = 1 + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(2); + t = 2; COPY_DICT(t,m_off) +#else +#if defined(LZO1Z) + t = 1 + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LOOKBEHIND(m_pos,out); NEED_OP(2); + *op++ = *m_pos++; *op++ = *m_pos; +#endif + goto match_done; + } + +#if defined(COPY_DICT) + + NEED_OP(t+3-1); + t += 3-1; COPY_DICT(t,m_off) + +#else + + TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +#if !defined(LZO_UNALIGNED_OK_4) + if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) + { + assert((op - m_pos) >= 4); +#else + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) + { +#endif + * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; + op += 4; m_pos += 4; t -= 4 - (3 - 1); + do { + * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; + op += 4; m_pos += 4; t -= 4; + } while (t >= 4); + if (t > 0) do *op++ = *m_pos++; while (--t > 0); + } + else +#endif + { +copy_match: + *op++ = *m_pos++; *op++ = *m_pos++; + do *op++ = *m_pos++; while (--t > 0); + } + +#endif + +match_done: +#if defined(LZO1Z) + t = ip[-1] & 3; +#else + t = ip[-2] & 3; +#endif + if (t == 0) + break; + +match_next: + assert(t > 0); NEED_OP(t); NEED_IP(t+1); + do *op++ = *ip++; while (--t > 0); + t = *ip++; + } + } + +#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) + *out_len = op - out; + return LZO_E_EOF_NOT_FOUND; +#endif + +eof_found: + assert(t == 1); + *out_len = op - out; + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + +#if defined(HAVE_NEED_IP) +input_overrun: + *out_len = op - out; + return LZO_E_INPUT_OVERRUN; +#endif + +#if defined(HAVE_NEED_OP) +output_overrun: + *out_len = op - out; + return LZO_E_OUTPUT_OVERRUN; +#endif + +#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) +lookbehind_overrun: + *out_len = op - out; + return LZO_E_LOOKBEHIND_OVERRUN; +#endif +} + +/***** End of minilzo.c *****/ + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/minilzo.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/minilzo.h Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,97 @@ +/* minilzo.h -- mini subset of the LZO real-time data compression library + + This file is part of the LZO real-time data compression library. + + Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html + */ + +/* + * NOTE: + * the full LZO package can be found at + * http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html + */ + + +#ifndef __MINILZO_H +#define __MINILZO_H + +#define MINILZO_VERSION 0x1070 + +#ifdef __LZOCONF_H +# error "you cannot use both LZO and miniLZO" +#endif + +#undef LZO_HAVE_CONFIG_H +#include "lzoconf.h" + +#if !defined(LZO_VERSION) || (LZO_VERSION != MINILZO_VERSION) +# error "version mismatch in header files" +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +/*********************************************************************** +// +************************************************************************/ + +/* Memory required for the wrkmem parameter. + * When the required size is 0, you can also pass a NULL pointer. + */ + +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS +#define LZO1X_1_MEM_COMPRESS ((lzo_uint32) (16384L * lzo_sizeof_dict_t)) +#define LZO1X_MEM_DECOMPRESS (0) + + +/* compression */ +LZO_EXTERN(int) +lzo1x_1_compress ( const lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem ); + +/* decompression */ +LZO_EXTERN(int) +lzo1x_decompress ( const lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem /* NOT USED */ ); + +/* safe decompression with overrun testing */ +LZO_EXTERN(int) +lzo1x_decompress_safe ( const lzo_byte *src, lzo_uint src_len, + lzo_byte *dst, lzo_uint *dst_len, + lzo_voidp wrkmem /* NOT USED */ ); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/msvidc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/msvidc.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,393 @@ +/* + Microsoft Video 1 Decoder + + (C) 2001 Mike Melanson + + The description of the algorithm you can read here: + http://www.pcisys.net/~melanson/codecs/ + + 32bpp support (c) alex +*/ + +#include "config.h" +#include "bswap.h" + +#define LE_16(x) (le2me_16(*(unsigned short *)(x))) + +#define DECODE_BGR555_TO_BGR888(x) \ + x.c1_b = (x.c1 >> 7) & 0xF8; \ + x.c1_g = (x.c1 >> 2) & 0xF8; \ + x.c1_r = (x.c1 << 3) & 0xF8; \ + x.c2_b = (x.c2 >> 7) & 0xF8; \ + x.c2_g = (x.c2 >> 2) & 0xF8; \ + x.c2_r = (x.c2 << 3) & 0xF8; + +#define DECODE_PALETTE_TO_BGR888(x) \ + x.c1_b = palette_map[x.c1 * 4 + 2]; \ + x.c1_g = palette_map[x.c1 * 4 + 1]; \ + x.c1_r = palette_map[x.c1 * 4 + 0]; \ + x.c2_b = palette_map[x.c2 * 4 + 2]; \ + x.c2_g = palette_map[x.c2 * 4 + 1]; \ + x.c2_r = palette_map[x.c2 * 4 + 0]; + +struct +{ + unsigned short c1, c2; + unsigned char c1_r, c1_g, c1_b; + unsigned char c2_r, c2_g, c2_b; +} quad[2][2]; + +void AVI_Decode_Video1_16( + char *encoded, + int encoded_size, + char *decoded, + int width, + int height, + int bytes_per_pixel) +{ + int block_ptr, pixel_ptr; + int total_blocks; + int pixel_x, pixel_y; // pixel width and height iterators + int block_x, block_y; // block width and height iterators + int blocks_wide, blocks_high; // width and height in 4x4 blocks + int block_inc; + int row_dec; + + // decoding parameters + int stream_ptr; + unsigned char byte_a, byte_b; + unsigned short flags; + int skip_blocks; + + stream_ptr = 0; + skip_blocks = 0; + blocks_wide = width / 4; + blocks_high = height / 4; + total_blocks = blocks_wide * blocks_high; + block_inc = 4 * bytes_per_pixel; + row_dec = (width + 4) * bytes_per_pixel; + + for (block_y = blocks_high; block_y > 0; block_y--) + { + block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel); + for (block_x = blocks_wide; block_x > 0; block_x--) + { + // check if this block should be skipped + if (skip_blocks) + { + block_ptr += block_inc; + skip_blocks--; + total_blocks--; + continue; + } + + pixel_ptr = block_ptr; + + // get the next two bytes in the encoded data stream + byte_a = encoded[stream_ptr++]; + byte_b = encoded[stream_ptr++]; + + // check if the decode is finished + if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0)) + return; + + // check if this is a skip code + else if ((byte_b & 0xFC) == 0x84) + { + // but don't count the current block + skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1; + } + + // check if this is in the 2- or 8-color classes + else if (byte_b < 0x80) + { + flags = (byte_b << 8) | byte_a; + + quad[0][0].c1 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + quad[0][0].c2 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + + DECODE_BGR555_TO_BGR888(quad[0][0]); + + if (quad[0][0].c1 & 0x8000) + { + // 8-color encoding + quad[1][0].c1 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + quad[1][0].c2 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + quad[0][1].c1 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + quad[0][1].c2 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + quad[1][1].c1 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + quad[1][1].c2 = LE_16(&encoded[stream_ptr]); + stream_ptr += 2; + + DECODE_BGR555_TO_BGR888(quad[0][1]); + DECODE_BGR555_TO_BGR888(quad[1][0]); + DECODE_BGR555_TO_BGR888(quad[1][1]); + + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + if (flags & 1) + { + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + else + { + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + + // get the next flag ready to go + flags >>= 1; + } + pixel_ptr -= row_dec; + } + } + else + { + // 2-color encoding + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + if (flags & 1) + { + decoded[pixel_ptr++] = quad[0][0].c1_r; + decoded[pixel_ptr++] = quad[0][0].c1_g; + decoded[pixel_ptr++] = quad[0][0].c1_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + else + { + decoded[pixel_ptr++] = quad[0][0].c2_r; + decoded[pixel_ptr++] = quad[0][0].c2_g; + decoded[pixel_ptr++] = quad[0][0].c2_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + + // get the next flag ready to go + flags >>= 1; + } + pixel_ptr -= row_dec; + } + } + } + + // otherwise, it's a 1-color block + else + { + quad[0][0].c1 = (byte_b << 8) | byte_a; + DECODE_BGR555_TO_BGR888(quad[0][0]); + + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + decoded[pixel_ptr++] = quad[0][0].c1_r; + decoded[pixel_ptr++] = quad[0][0].c1_g; + decoded[pixel_ptr++] = quad[0][0].c1_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + pixel_ptr -= row_dec; + } + } + + block_ptr += block_inc; + total_blocks--; + } + } +} + +void AVI_Decode_Video1_8( + char *encoded, + int encoded_size, + char *decoded, + int width, + int height, + unsigned char *palette_map, + int bytes_per_pixel) +{ + int block_ptr, pixel_ptr; + int total_blocks; + int pixel_x, pixel_y; // pixel width and height iterators + int block_x, block_y; // block width and height iterators + int blocks_wide, blocks_high; // width and height in 4x4 blocks + int block_inc; + int row_dec; + + // decoding parameters + int stream_ptr; + unsigned char byte_a, byte_b; + unsigned short flags; + int skip_blocks; + + stream_ptr = 0; + skip_blocks = 0; + blocks_wide = width / 4; + blocks_high = height / 4; + total_blocks = blocks_wide * blocks_high; + block_inc = 4 * bytes_per_pixel; + row_dec = (width + 4) * bytes_per_pixel; + + for (block_y = blocks_high; block_y > 0; block_y--) + { + block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel); + for (block_x = blocks_wide; block_x > 0; block_x--) + { + // check if this block should be skipped + if (skip_blocks) + { + block_ptr += block_inc; + skip_blocks--; + total_blocks--; + continue; + } + + pixel_ptr = block_ptr; + + // get the next two bytes in the encoded data stream + byte_a = encoded[stream_ptr++]; + byte_b = encoded[stream_ptr++]; + + // check if the decode is finished + if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0)) + return; + + // check if this is a skip code + else if ((byte_b & 0xFC) == 0x84) + { + // but don't count the current block + skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1; + } + + // check if this is a 2-color block + else if (byte_b < 0x80) + { + flags = (byte_b << 8) | byte_a; + + quad[0][0].c1 = (unsigned char)encoded[stream_ptr++]; + quad[0][0].c2 = (unsigned char)encoded[stream_ptr++]; + + DECODE_PALETTE_TO_BGR888(quad[0][0]); + + // 2-color encoding + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + if (flags & 1) + { + decoded[pixel_ptr++] = quad[0][0].c1_r; + decoded[pixel_ptr++] = quad[0][0].c1_g; + decoded[pixel_ptr++] = quad[0][0].c1_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + else + { + decoded[pixel_ptr++] = quad[0][0].c2_r; + decoded[pixel_ptr++] = quad[0][0].c2_g; + decoded[pixel_ptr++] = quad[0][0].c2_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + + // get the next flag ready to go + flags >>= 1; + } + pixel_ptr -= row_dec; + } + } + + // check if it's an 8-color block + else if (byte_b >= 0x90) + { + flags = (byte_b << 8) | byte_a; + + quad[0][0].c1 = (unsigned char)encoded[stream_ptr++]; + quad[0][0].c2 = (unsigned char)encoded[stream_ptr++]; + quad[1][0].c1 = (unsigned char)encoded[stream_ptr++]; + quad[1][0].c2 = (unsigned char)encoded[stream_ptr++]; + + quad[0][1].c1 = (unsigned char)encoded[stream_ptr++]; + quad[0][1].c2 = (unsigned char)encoded[stream_ptr++]; + quad[1][1].c1 = (unsigned char)encoded[stream_ptr++]; + quad[1][1].c2 = (unsigned char)encoded[stream_ptr++]; + + DECODE_PALETTE_TO_BGR888(quad[0][0]); + DECODE_PALETTE_TO_BGR888(quad[0][1]); + DECODE_PALETTE_TO_BGR888(quad[1][0]); + DECODE_PALETTE_TO_BGR888(quad[1][1]); + + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + if (flags & 1) + { + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + else + { + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g; + decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + + // get the next flag ready to go + flags >>= 1; + } + pixel_ptr -= row_dec; + } + } + + // otherwise, it's a 1-color block + else + { + // init c2 along with c1 just so c2 is a known value for macro + quad[0][0].c1 = quad[0][0].c2 = byte_a; + DECODE_PALETTE_TO_BGR888(quad[0][0]); + + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + decoded[pixel_ptr++] = quad[0][0].c1_r; + decoded[pixel_ptr++] = quad[0][0].c1_g; + decoded[pixel_ptr++] = quad[0][0].c1_b; + if (bytes_per_pixel == 4) /* 32bpp */ + pixel_ptr++; + } + pixel_ptr -= row_dec; + } + } + + block_ptr += block_inc; + total_blocks--; + } + } +} + diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/nuppelvideo.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/nuppelvideo.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,123 @@ +/* + * NuppelVideo 0.05 file parser + * for MPlayer + * by Panagiotis Issaris + * + * Reworked by alex + */ + +#include +#include +#include + +#include "config.h" +#include "mp_msg.h" + +#include "fastmemcpy.h" + +#include "libmpdemux/nuppelvideo.h" +#include "RTjpegN.h" +#include "minilzo.h" + +#define KEEP_BUFFER + +void decode_nuv( unsigned char *encoded, int encoded_size, + unsigned char *decoded, int width, int height) +{ + int r; + unsigned int out_len; + struct rtframeheader *encodedh = ( struct rtframeheader* ) encoded; + static unsigned char *buffer = 0; /* for RTJpeg with LZO decompress */ +#ifdef KEEP_BUFFER + static unsigned char *previous_buffer = 0; /* to support Last-frame-copy */ +#endif + static int is_lzo_inited = 0; + +// printf("frametype: %c, comtype: %c, encoded_size: %d, width: %d, height: %d\n", +// encodedh->frametype, encodedh->comptype, encoded_size, width, height); + + switch(encodedh->frametype) + { + case 'D': /* additional data for compressors */ + { + /* tables are in encoded */ + if (encodedh->comptype == 'R') + { + RTjpeg_init_decompress ( encoded+12, width, height ); + mp_msg(MSGT_DECVIDEO, MSGL_V, "Found RTjpeg tables (size: %d, width: %d, height: %d)\n", + encoded_size-12, width, height); + } + break; + } + case 'V': + { +#ifdef KEEP_BUFFER + if (!previous_buffer) + previous_buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 ); +#endif + + if (((encodedh->comptype == '2') || + (encodedh->comptype == '3')) && !is_lzo_inited) + { + /* frame using lzo, init lzo first if not inited */ + if ( lzo_init() != LZO_E_OK ) + { + mp_msg(MSGT_DECVIDEO, MSGL_ERR, "LZO init failed\n"); + return; + } + is_lzo_inited = 1; + } + + switch(encodedh->comptype) + { + case '0': /* raw YUV420 */ + memcpy(decoded, encoded + 12, width*height*3/2); + break; + case '1': /* RTJpeg */ + RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded ); + break; + case '2': /* RTJpeg with LZO */ + if (!buffer) + buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 ); + if (!buffer) + { + mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); + break; + } + r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, buffer, &out_len, NULL ); + if ( r != LZO_E_OK ) + { + mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); + break; + } + RTjpeg_decompressYUV420 ( ( __s8 * ) buffer, decoded ); + break; + case '3': /* raw YUV420 with LZO */ + r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, decoded, &out_len, NULL ); + if ( r != LZO_E_OK ) + { + mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); + break; + } + break; + case 'N': /* black frame */ + memset ( decoded, 0, width * height ); + memset ( decoded + width * height, 127, width * height / 2); + break; + case 'L': /* copy last frame */ +#ifdef KEEP_BUFFER + memcpy ( decoded, previous_buffer, width*height*3/2); +#endif + break; + } + +#ifdef KEEP_BUFFER + memcpy(previous_buffer, decoded, width*height*3/2); +#endif + break; + } + default: + mp_msg(MSGT_DECVIDEO, MSGL_V, "Nuppelvideo: unknwon frametype: %c\n", + encodedh->frametype); + } +} diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/qtrle.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/qtrle.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,125 @@ +/* + Quicktime Animation (RLE) Decoder for MPlayer + + (C) 2001 Mike Melanson +*/ + +#include "config.h" +#include "bswap.h" + +#define BE_16(x) (be2me_16(*(unsigned short *)(x))) +#define BE_32(x) (be2me_32(*(unsigned int *)(x))) + +// 256 RGB entries; 25% of these bytes will be unused, but it's faster +// to index 4-byte entries +static unsigned char palette[256 * 4]; + +void qt_decode_rle24( + unsigned char *encoded, + int encoded_size, + unsigned char *decoded, + int width, + int height, + int bytes_per_pixel) +{ + int stream_ptr; + int header; + int start_line; + int lines_to_change; + signed char rle_code; + int row_ptr, pixel_ptr; + int row_inc = bytes_per_pixel * width; + unsigned char r, g, b; + + // check if this frame is even supposed to change + if (encoded_size < 8) + return; + + // start after the chunk size + stream_ptr = 4; + + // fetch the header + header = BE_16(&encoded[stream_ptr]); + stream_ptr += 2; + + // if a header is present, fetch additional decoding parameters + if (header & 0x0008) + { + start_line = BE_16(&encoded[stream_ptr]); + stream_ptr += 4; + lines_to_change = BE_16(&encoded[stream_ptr]); + stream_ptr += 4; + } + else + { + start_line = 0; + lines_to_change = height; + } + + row_ptr = row_inc * start_line; + while (lines_to_change--) + { + pixel_ptr = row_ptr + ((encoded[stream_ptr++] - 1) * bytes_per_pixel); + + while ((rle_code = (signed char)encoded[stream_ptr++]) != -1) + { + if (rle_code == 0) + // there's another skip code in the stream + pixel_ptr += ((encoded[stream_ptr++] - 1) * bytes_per_pixel); + else if (rle_code < 0) + { + // decode the run length code + rle_code = -rle_code; + r = encoded[stream_ptr++]; + g = encoded[stream_ptr++]; + b = encoded[stream_ptr++]; + while (rle_code--) + { + decoded[pixel_ptr++] = b; + decoded[pixel_ptr++] = g; + decoded[pixel_ptr++] = r; + if (bytes_per_pixel == 4) + pixel_ptr++; + } + } + else + { + // copy pixels directly to output + while (rle_code--) + { + decoded[pixel_ptr++] = encoded[stream_ptr + 2]; + decoded[pixel_ptr++] = encoded[stream_ptr + 1]; + decoded[pixel_ptr++] = encoded[stream_ptr + 0]; + stream_ptr += 3; + if (bytes_per_pixel == 4) + pixel_ptr++; + } + } + } + + row_ptr += row_inc; + } +} + +void qt_decode_rle( + unsigned char *encoded, + int encoded_size, + unsigned char *decoded, + int width, + int height, + int encoded_bpp, + int bytes_per_pixel) +{ + switch (encoded_bpp) + { + case 24: + qt_decode_rle24( + encoded, + encoded_size, + decoded, + width, + height, + bytes_per_pixel); + break; + } +} diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/qtrpza.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/qtrpza.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,264 @@ +/* + * + * Apple Video (rpza) QuickTime Decoder for Mplayer + * (c) 2002 Roberto Togni + * + * Fourcc: rpza, azpr + * + * Some code comes from qtsmc.c by Mike Melanson + * + * A description of the decoding algorithm can be found here: + * http://www.pcisys.net/~melanson/codecs/ + */ + +#include "config.h" +#include "bswap.h" +#include "mp_msg.h" + +#define BE_16(x) (be2me_16(*(unsigned short *)(x))) +#define BE_32(x) (be2me_32(*(unsigned int *)(x))) + + +#define ADVANCE_BLOCK() \ +{ \ + pixel_ptr += block_x_inc; \ + if (pixel_ptr >= (width * bytes_per_pixel)) \ + { \ + pixel_ptr = 0; \ + row_ptr += block_y_inc * 4; \ + } \ + total_blocks--; \ + if (total_blocks < 0) \ + { \ + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \ + return; \ + } \ +} + +#define PAINT_CURRENT_PIXEL(r, g, b, color) \ +{ \ + if (bytes_per_pixel == 2) { \ + (*(unsigned short*)(&decoded[block_ptr])) = color & 0x7fff; \ + block_ptr += 2; \ + } else { \ + decoded[block_ptr++] = (b); \ + decoded[block_ptr++] = (g); \ + decoded[block_ptr++] = (r); \ + if (bytes_per_pixel == 4) /* 32bpp */ \ + block_ptr++; \ + } \ +} + +#define COLOR_FIX(col_out, col_in) (col_out) = ((col_in) << 3) | ((col_in) >> 2) + +#define COLOR_TO_RGB(r, g, b, color) \ +{ \ + if (bytes_per_pixel != 2) { \ + unsigned short tmp; \ + tmp = (color >> 10) & 0x1f; \ + COLOR_FIX (r, tmp); \ + tmp = (color >> 5) & 0x1f; \ + COLOR_FIX (g, tmp); \ + tmp = color & 0x1f; \ + COLOR_FIX (b, tmp); \ + } \ +} + +#define COLORAB_TO_RGB4(rgb4, color4, colorA, colorB) \ +{ \ + unsigned short ta, tb, tt; \ + if (bytes_per_pixel != 2) { \ + ta = (colorA >> 10) & 0x1f; \ + tb = (colorB >> 10) & 0x1f; \ + COLOR_FIX (rgb4[3][0], ta); \ + COLOR_FIX (rgb4[0][0], tb); \ + tt = (11 * ta + 21 * tb) >> 5; \ + COLOR_FIX (rgb4[1][0], tt); \ + tt = (21 * ta + 11 * tb) >> 5; \ + COLOR_FIX (rgb4[2][0], tt); \ + ta = (colorA >> 5) & 0x1f; \ + tb = (colorB >> 5) & 0x1f; \ + COLOR_FIX (rgb4[3][1], ta); \ + COLOR_FIX (rgb4[0][1], tb); \ + tt = (11 * ta + 21 * tb) >> 5; \ + COLOR_FIX (rgb4[1][1], tt); \ + tt = (21 * ta + 11 * tb) >> 5; \ + COLOR_FIX (rgb4[2][1], tt); \ + ta = colorA & 0x1f; \ + tb = colorB & 0x1f; \ + COLOR_FIX (rgb4[3][2], ta); \ + COLOR_FIX (rgb4[0][2], tb); \ + tt = (11 * ta + 21 * tb) >> 5; \ + COLOR_FIX (rgb4[1][2], tt); \ + tt = (21 * ta + 11 * tb) >> 5; \ + COLOR_FIX (rgb4[2][2], tt); \ + } else { \ + color4[3] = colorA; \ + color4[0] = colorB; \ + ta = (colorA >> 10) & 0x1f; \ + tb = (colorB >> 10) & 0x1f; \ + color4[1] = ((11 * ta + 21 * tb) << 5) & 0x7c00; \ + color4[2] = ((21 * ta + 11 * tb) << 5) & 0x7c00; \ + ta = (colorA >> 5) & 0x1f; \ + tb = (colorB >> 5) & 0x1f; \ + color4[1] |= (11 * ta + 21 * tb) & 0x3e0; \ + color4[2] |= (21 * ta + 11 * tb) & 0x3e0; \ + ta = colorA & 0x1f; \ + tb = colorB & 0x1f; \ + color4[1] |= (11 * ta + 21 * tb) >> 5; \ + color4[2] |= (21 * ta + 11 * tb) >> 5; \ + } \ +} + + + +/* + * rpza frame decoder + * + * Input values: + * + * *encoded: buffer of encoded data (chunk) + * encoded_size: length of encoded buffer + * *decoded: buffer where decoded data is written (image buffer) + * width: width of decoded frame in pixels + * height: height of decoded frame in pixels + * bytes_per_pixel: bytes/pixel in output image (color depth) + * + */ + +void qt_decode_rpza(char *encoded, int encoded_size, char *decoded, int width, + int height, int bytes_per_pixel) +{ + + int i; + int stream_ptr = 0; + int chunk_size; + unsigned char opcode; + int n_blocks; + unsigned short colorA, colorB; + unsigned char r, g, b; + unsigned char rgb4[4][3]; + unsigned short color4[4]; + unsigned char index, idx; + + int row_ptr = 0; + int pixel_ptr = 0; + int pixel_x, pixel_y; + int row_inc = bytes_per_pixel * (width - 4); + int max_height = row_inc * height; + int block_x_inc = bytes_per_pixel * 4; + int block_y_inc = bytes_per_pixel * width; + int block_ptr; + int total_blocks; + + + /* First byte is always 0xe1. Warn if it's different */ + if ((unsigned char)encoded[stream_ptr] != 0xe1) + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "First chunk byte is 0x%02x instead of 0x1e\n", + (unsigned char)encoded[stream_ptr]); + + /* Get chunk size, ingnoring first byte */ + chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF; + stream_ptr += 4; + + /* If length mismatch use size from MOV file and try to decode anyway */ + if (chunk_size != encoded_size) + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n"); + + chunk_size = encoded_size; + + /* Number of 4x4 blocks in frame. */ + total_blocks = (width * height) / (4 * 4); + + /* Process chunk data */ + while (stream_ptr < chunk_size) { + opcode = encoded[stream_ptr++]; /* Get opcode */ + + n_blocks = (opcode & 0x1f) +1; /* Extract block counter from opcode */ + + /* If opcode MSbit is 0, we need more data to decide what to do */ + if ((opcode & 0x80) == 0) { + colorA = (opcode << 8) | ((unsigned char)encoded[stream_ptr++]); + opcode = 0; + if ((encoded[stream_ptr] & 0x80) != 0) { + /* Must behave as opcode 110xxxxx, using colorA computed above.*/ + /* Use fake opcode 0x20 to enter switch block at the right place */ + opcode = 0x20; + n_blocks = 1; + } + } + switch (opcode & 0xe0) { + /* Skip blocks */ + case 0x80: + while (n_blocks--) + ADVANCE_BLOCK(); + break; + + /* Fill blocks with one color */ + case 0xa0: + colorA = BE_16 (&encoded[stream_ptr]); + stream_ptr += 2; + COLOR_TO_RGB (r, g, b, colorA); + while (n_blocks--) { + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) { + for (pixel_x = 0; pixel_x < 4; pixel_x++){ + PAINT_CURRENT_PIXEL(r, g, b, colorA); + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + /* Fill blocks with 4 colors */ + case 0xc0: + colorA = BE_16 (&encoded[stream_ptr]); + stream_ptr += 2; + case 0x20: + colorB = BE_16 (&encoded[stream_ptr]); + stream_ptr += 2; + COLORAB_TO_RGB4 (rgb4, color4, colorA, colorB); + while (n_blocks--) { + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) { + index = encoded[stream_ptr++]; + for (pixel_x = 0; pixel_x < 4; pixel_x++){ + idx = (index >> (2 * (3 - pixel_x))) & 0x03; + PAINT_CURRENT_PIXEL(rgb4[idx][0], rgb4[idx][1], rgb4[idx][2], color4[idx]); + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + /* Fill block with 16 colors */ + case 0x00: + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) { + for (pixel_x = 0; pixel_x < 4; pixel_x++){ + /* We already have color of upper left pixel */ + if ((pixel_y != 0) || (pixel_x !=0)) { + colorA = BE_16 (&encoded[stream_ptr]); + stream_ptr += 2; + } + COLOR_TO_RGB (r, g, b, colorA); + PAINT_CURRENT_PIXEL(r, g, b, colorA); + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + break; + + /* Unknown opcode */ + default: + mp_msg(MSGT_DECVIDEO, MSGL_HINT, "Unknown opcode %d in rpza chunk." + " Skip remaining %lu bytes of chunk data.\n", opcode, + chunk_size - stream_ptr); + return; + } /* Opcode switch */ + + } +} diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/qtsmc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/qtsmc.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,513 @@ +/* + Apple Graphics (SMC) Decoder for MPlayer + by Mike Melanson + Special thanks for Roberto Togni for + tracking down the final, nagging bugs. + + The description of the decoding algorithm can be found here: + http://www.pcisys.net/~melanson/codecs/ +*/ + +#include +#include "config.h" +#include "bswap.h" +#include "mp_msg.h" + +#define BE_16(x) (be2me_16(*(unsigned short *)(x))) +#define BE_32(x) (be2me_32(*(unsigned int *)(x))) + +#define COLORS_PER_TABLE 256 +#define BYTES_PER_COLOR 4 + +#define CPAIR 2 +#define CQUAD 4 +#define COCTET 8 + +static unsigned char *color_pairs; +static unsigned char *color_quads; +static unsigned char *color_octets; + +static int color_pair_index; +static int color_quad_index; +static int color_octet_index; + +static int smc_initialized; + +// returns 0 if successfully initialized (enough memory was available), +// non-zero on failure +int qt_init_decode_smc(void) +{ + // be pessimistic to start + smc_initialized = 0; + + // allocate memory for the 3 palette tables + if ((color_pairs = (unsigned char *)malloc( + COLORS_PER_TABLE * BYTES_PER_COLOR * 2)) == 0) + return 1; + if ((color_quads = (unsigned char *)malloc( + COLORS_PER_TABLE * BYTES_PER_COLOR * 4)) == 0) + return 1; + if ((color_octets = (unsigned char *)malloc( + COLORS_PER_TABLE * BYTES_PER_COLOR * 8)) == 0) + return 1; + + // if execution got this far, initialization succeeded + smc_initialized = 1; + return 0; +} + +#define GET_BLOCK_COUNT \ + (opcode & 0x10) ? (1 + encoded[stream_ptr++]) : 1 + (opcode & 0x0F); +#define ADVANCE_BLOCK() \ +{ \ + pixel_ptr += block_x_inc; \ + if (pixel_ptr >= byte_width) \ + { \ + pixel_ptr = 0; \ + row_ptr += block_y_inc * 4; \ + } \ + total_blocks--; \ + if (total_blocks < 0) \ + { \ + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \ + return; \ + } \ +} + +void qt_decode_smc( + unsigned char *encoded, + int encoded_size, + unsigned char *decoded, + int pixel_width, + int pixel_height, + unsigned char *palette_map, + int bytes_per_pixel) +{ + int i; + int stream_ptr = 0; + int chunk_size; + unsigned char opcode; + int n_blocks; + unsigned int color_flags; + unsigned int color_flags_a; + unsigned int color_flags_b; + unsigned int flag_mask; + + int byte_width = pixel_width * bytes_per_pixel; // width of a row in bytes + int byte_height = pixel_height * byte_width; // max image size, basically + int row_ptr = 0; + int pixel_ptr = 0; + int pixel_x, pixel_y; + int row_inc = bytes_per_pixel * (pixel_width - 4); + int block_x_inc = bytes_per_pixel * 4; + int block_y_inc = bytes_per_pixel * pixel_width; + int block_ptr; + int prev_block_ptr; + int prev_block_ptr1, prev_block_ptr2; + int prev_block_flag; + int total_blocks; + int color_table_index; // indexes to color pair, quad, or octet tables + int color_index; // indexes into palette map + + if (!smc_initialized) + return; + + // reset color tables + color_pair_index = 0; + color_quad_index = 0; + color_octet_index = 0; + + chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF; + stream_ptr += 4; + if (chunk_size != encoded_size) + mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n"); + + chunk_size = encoded_size; + total_blocks = (pixel_width * pixel_height) / (4 * 4); + + // traverse through the blocks + while (total_blocks) + { + // sanity checks + // make sure stream ptr hasn't gone out of bounds + if (stream_ptr > chunk_size) + { + mp_msg(MSGT_DECVIDEO, MSGL_ERR, + "SMC decoder just went out of bounds (stream ptr = %d, chunk size = %d)\n", + stream_ptr, chunk_size); + return; + } + // make sure the row pointer hasn't gone wild + if (row_ptr >= byte_height) + { + mp_msg(MSGT_DECVIDEO, MSGL_ERR, + "SMC decoder just went out of bounds (row ptr = %d, height = %d)\n", + row_ptr, byte_height); + return; + } + + opcode = encoded[stream_ptr++]; + switch (opcode & 0xF0) + { + // skip n blocks + case 0x00: + case 0x10: + n_blocks = GET_BLOCK_COUNT; + while (n_blocks--) + ADVANCE_BLOCK(); + break; + + // repeat last block n times + case 0x20: + case 0x30: + n_blocks = GET_BLOCK_COUNT; + + // sanity check + if ((row_ptr == 0) && (pixel_ptr == 0)) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "encountered repeat block opcode (%02X) but no blocks rendered yet\n", + opcode & 0xF0); + break; + } + + // figure out where the previous block started + if (pixel_ptr == 0) + prev_block_ptr1 = (row_ptr - block_y_inc * 4) + + byte_width - block_x_inc; + else + prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc; + + while (n_blocks--) + { + block_ptr = row_ptr + pixel_ptr; + prev_block_ptr = prev_block_ptr1; + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + decoded[block_ptr++] = decoded[prev_block_ptr++]; + decoded[block_ptr++] = decoded[prev_block_ptr++]; + decoded[block_ptr++] = decoded[prev_block_ptr++]; + if (bytes_per_pixel == 4) /* 32bpp */ + { + block_ptr++; + prev_block_ptr++; + } + } + block_ptr += row_inc; + prev_block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + // repeat previous pair of blocks n times + case 0x40: + case 0x50: + n_blocks = GET_BLOCK_COUNT; + n_blocks *= 2; + + // sanity check + if ((row_ptr == 0) && (pixel_ptr < 2 * block_x_inc)) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "encountered repeat block opcode (%02X) but not enough blocks rendered yet\n", + opcode & 0xF0); + break; + } + + // figure out where the previous 2 blocks started + if (pixel_ptr == 0) + prev_block_ptr1 = (row_ptr - block_y_inc * 4) + + byte_width - block_x_inc * 2; + else if (pixel_ptr == block_x_inc) + prev_block_ptr1 = (row_ptr - block_y_inc * 4) + + byte_width - block_x_inc; + else + prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc * 2; + + if (pixel_ptr == 0) + prev_block_ptr2 = (row_ptr - block_y_inc * 4) + + (byte_width - block_x_inc); + else + prev_block_ptr2 = row_ptr + pixel_ptr - block_x_inc; + + prev_block_flag = 0; + while (n_blocks--) + { + block_ptr = row_ptr + pixel_ptr; + if (prev_block_flag) + prev_block_ptr = prev_block_ptr2; + else + prev_block_ptr = prev_block_ptr1; + prev_block_flag = !prev_block_flag; + + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + decoded[block_ptr++] = decoded[prev_block_ptr++]; + decoded[block_ptr++] = decoded[prev_block_ptr++]; + decoded[block_ptr++] = decoded[prev_block_ptr++]; + if (bytes_per_pixel == 4) /* 32bpp */ + { + block_ptr++; + prev_block_ptr++; + } + } + block_ptr += row_inc; + prev_block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + // 1-color block encoding + case 0x60: + case 0x70: + n_blocks = GET_BLOCK_COUNT; + color_index = encoded[stream_ptr++] * 4; + + while (n_blocks--) + { + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + decoded[block_ptr++] = palette_map[color_index + 0]; + decoded[block_ptr++] = palette_map[color_index + 1]; + decoded[block_ptr++] = palette_map[color_index + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + block_ptr++; + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + // 2-color block encoding + case 0x80: + case 0x90: + n_blocks = (opcode & 0x0F) + 1; + + // figure out which color pair to use to paint the 2-color block + if ((opcode & 0xF0) == 0x80) + { + // fetch the next 2 colors from bytestream and store in next + // available entry in the color pair table + for (i = 0; i < CPAIR; i++) + { + color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; + color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index + + (i * BYTES_PER_COLOR); + color_pairs[color_table_index + 0] = palette_map[color_index + 0]; + color_pairs[color_table_index + 1] = palette_map[color_index + 1]; + color_pairs[color_table_index + 2] = palette_map[color_index + 2]; + } + // this is the base index to use for this block + color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index; + color_pair_index++; + if (color_pair_index == COLORS_PER_TABLE) + color_pair_index = 0; + } + else + color_table_index = CPAIR * BYTES_PER_COLOR * encoded[stream_ptr++]; + + while (n_blocks--) + { + color_flags = BE_16(&encoded[stream_ptr]); + stream_ptr += 2; + flag_mask = 0x8000; + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + if (color_flags & flag_mask) + color_index = color_table_index + BYTES_PER_COLOR; + else + color_index = color_table_index; + flag_mask >>= 1; + + decoded[block_ptr++] = color_pairs[color_index + 0]; + decoded[block_ptr++] = color_pairs[color_index + 1]; + decoded[block_ptr++] = color_pairs[color_index + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + block_ptr++; + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + // 4-color block encoding + case 0xA0: + case 0xB0: + n_blocks = (opcode & 0x0F) + 1; + + // figure out which color quad to use to paint the 4-color block + if ((opcode & 0xF0) == 0xA0) + { + // fetch the next 4 colors from bytestream and store in next + // available entry in the color quad table + for (i = 0; i < CQUAD; i++) + { + color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; + color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index + + (i * BYTES_PER_COLOR); + color_quads[color_table_index + 0] = palette_map[color_index + 0]; + color_quads[color_table_index + 1] = palette_map[color_index + 1]; + color_quads[color_table_index + 2] = palette_map[color_index + 2]; + } + // this is the base index to use for this block + color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index; + color_quad_index++; + if (color_quad_index == COLORS_PER_TABLE) + color_quad_index = 0; + } + else + color_table_index = CQUAD * BYTES_PER_COLOR * encoded[stream_ptr++]; + + while (n_blocks--) + { + color_flags = BE_32(&encoded[stream_ptr]); + stream_ptr += 4; + // flag mask actually acts as a bit shift count here + flag_mask = 30; + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + color_index = color_table_index + (BYTES_PER_COLOR * + ((color_flags >> flag_mask) & 0x03)); + flag_mask -= 2; + + decoded[block_ptr++] = color_quads[color_index + 0]; + decoded[block_ptr++] = color_quads[color_index + 1]; + decoded[block_ptr++] = color_quads[color_index + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + block_ptr++; + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + // 8-color block encoding + case 0xC0: + case 0xD0: + n_blocks = (opcode & 0x0F) + 1; + + // figure out which color octet to use to paint the 8-color block + if ((opcode & 0xF0) == 0xC0) + { + // fetch the next 8 colors from bytestream and store in next + // available entry in the color octet table + for (i = 0; i < COCTET; i++) + { + color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; + color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index + + (i * BYTES_PER_COLOR); + color_octets[color_table_index + 0] = palette_map[color_index + 0]; + color_octets[color_table_index + 1] = palette_map[color_index + 1]; + color_octets[color_table_index + 2] = palette_map[color_index + 2]; + } + // this is the base index to use for this block + color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index; + color_octet_index++; + if (color_octet_index == COLORS_PER_TABLE) + color_octet_index = 0; + } + else + color_table_index = COCTET * BYTES_PER_COLOR * encoded[stream_ptr++]; + + while (n_blocks--) + { + /* + For this input: + 01 23 45 67 89 AB + This is the output: + flags_a = xx012456, flags_b = xx89A37B + */ + // build the color flags + color_flags_a = color_flags_b = 0; + color_flags_a = + (encoded[stream_ptr + 0] << 16) | + ((encoded[stream_ptr + 1] & 0xF0) << 8) | + ((encoded[stream_ptr + 2] & 0xF0) << 4) | + ((encoded[stream_ptr + 2] & 0x0F) << 4) | + ((encoded[stream_ptr + 3] & 0xF0) >> 4); + color_flags_b = + (encoded[stream_ptr + 4] << 16) | + ((encoded[stream_ptr + 5] & 0xF0) << 8) | + ((encoded[stream_ptr + 1] & 0x0F) << 8) | + ((encoded[stream_ptr + 3] & 0x0F) << 4) | + (encoded[stream_ptr + 5] & 0x0F); + stream_ptr += 6; + + color_flags = color_flags_a; + // flag mask actually acts as a bit shift count here + flag_mask = 21; + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + // reload flags at third row (iteration pixel_y == 2) + if (pixel_y == 2) + { + color_flags = color_flags_b; + flag_mask = 21; + } + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + color_index = color_table_index + (BYTES_PER_COLOR * + ((color_flags >> flag_mask) & 0x07)); + flag_mask -= 3; + + decoded[block_ptr++] = color_octets[color_index + 0]; + decoded[block_ptr++] = color_octets[color_index + 1]; + decoded[block_ptr++] = color_octets[color_index + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + block_ptr++; + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + // 16-color block encoding (every pixel is a different color) + case 0xE0: + n_blocks = (opcode & 0x0F) + 1; + + while (n_blocks--) + { + block_ptr = row_ptr + pixel_ptr; + for (pixel_y = 0; pixel_y < 4; pixel_y++) + { + for (pixel_x = 0; pixel_x < 4; pixel_x++) + { + color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; + decoded[block_ptr++] = palette_map[color_index + 0]; + decoded[block_ptr++] = palette_map[color_index + 1]; + decoded[block_ptr++] = palette_map[color_index + 2]; + if (bytes_per_pixel == 4) /* 32bpp */ + block_ptr++; + } + block_ptr += row_inc; + } + ADVANCE_BLOCK(); + } + break; + + case 0xF0: + mp_msg(MSGT_DECVIDEO, MSGL_HINT, "0xF0 opcode seen in SMC chunk (MPlayer developers would like to know)\n"); + break; + } + } +} diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/roqav.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/roqav.c Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,670 @@ +/* + RoQ A/V decoder for the MPlayer program + by Mike Melanson + based on Dr. Tim Ferguson's RoQ document and accompanying source + code found at: + http://www.csse.monash.edu.au/~timf/videocodec.html +*/ + +#include +#include +#include "config.h" +#include "bswap.h" +#include "mp_msg.h" +#include "mp_image.h" + +#define LE_16(x) (le2me_16(*(unsigned short *)(x))) +#define LE_32(x) (le2me_32(*(unsigned int *)(x))) + +#define CLAMP_S16(x) if (x < -32768) x = -32768; \ + else if (x > 32767) x = 32767; +#define SE_16BIT(x) if (x & 0x8000) x -= 0x10000; + +// RoQ chunk types +#define RoQ_INFO 0x1001 +#define RoQ_QUAD_CODEBOOK 0x1002 +#define RoQ_QUAD_VQ 0x1011 +#define RoQ_SOUND_MONO 0x1020 +#define RoQ_SOUND_STEREO 0x1021 + +#define MAX_ROQ_CODEBOOK_SIZE 256 + +// codebook entry for 2x2 vector +typedef struct +{ + // upper and lower luminance value pairs of 2x2 vector: [y0 y1], [y2 y3] + unsigned short v2_y_u; + unsigned short v2_y_l; + + // chrominance components + unsigned char u, v; + + // these variables are for rendering a doublesized 8x8 block; e.g.: + // v2_y_rows12 = [y0 y0 y1 y1] + // v2_y_rows34 = [y2 y2 y3 y3] + unsigned long v2d_y_rows_12; + unsigned long v2d_y_rows_34; + // ex: v2_u_row1 = [u u] + // v2_v_row2 = [v v] + unsigned short v2d_u_rows_12; + unsigned short v2d_v_rows_12; + + // maintain separate bytes for the luminance values as well + unsigned char y0, y1, y2, y3; +} roq_v2_codebook; + +// codebook entry for 4x4 vector +typedef struct +{ + unsigned char v2_index[4]; +} roq_v4_codebook; + +typedef struct +{ + roq_v2_codebook v2[MAX_ROQ_CODEBOOK_SIZE]; + roq_v4_codebook v4[MAX_ROQ_CODEBOOK_SIZE]; + mp_image_t *prev_frame; +} roqvideo_info; + + +// This function fills in the missing information for a v2 vector after +// loading the Y, U and V values. +inline void prep_v2(roq_v2_codebook *v2) +{ + v2->v2_y_u = be2me_16((v2->y0 << 8) | v2->y1); + v2->v2_y_l = be2me_16((v2->y2 << 8) | v2->y3); + + v2->v2d_y_rows_12 = be2me_32((v2->y0 << 24) | (v2->y0 << 16) | + (v2->y1 << 8) | v2->y1); + v2->v2d_y_rows_34 = be2me_32((v2->y2 << 24) | (v2->y2 << 16) | + (v2->y3 << 8) | v2->y3); + + // no reason to swap these for endianness since they're the same bytes + v2->v2d_u_rows_12 = (v2->u << 8) | v2->u; + v2->v2d_v_rows_12 = (v2->v << 8) | v2->v; +} + +inline void paint_v2double_block( + unsigned char *y_plane, + unsigned char *u_plane, + unsigned char *v_plane, + roq_v2_codebook *v2, + unsigned int y_stride, + unsigned int u_stride, + unsigned int v_stride) +{ + // render the luminance components + *(unsigned int *)y_plane = v2->v2d_y_rows_12; + y_plane += y_stride; + *(unsigned int *)y_plane = v2->v2d_y_rows_12; + y_plane += y_stride; + *(unsigned int *)y_plane = v2->v2d_y_rows_34; + y_plane += y_stride; + *(unsigned int *)y_plane = v2->v2d_y_rows_34; + + // render the color planes + *(unsigned short *)u_plane = v2->v2d_u_rows_12; + u_plane += u_stride; + *(unsigned short *)u_plane = v2->v2d_u_rows_12; + + *(unsigned short *)v_plane = v2->v2d_v_rows_12; + v_plane += v_stride; + *(unsigned short *)v_plane = v2->v2d_v_rows_12; +} + +inline void paint_v4_block( + unsigned char *y_plane, + unsigned char *u_plane, + unsigned char *v_plane, + unsigned int y_stride, + unsigned int u_stride, + unsigned int v_stride, + roq_v2_codebook *v2_a, + roq_v2_codebook *v2_b, + roq_v2_codebook *v2_c, + roq_v2_codebook *v2_d) +{ + // render luminance components + ((unsigned short *)y_plane)[0] = v2_a->v2_y_u; + ((unsigned short *)y_plane)[1] = v2_b->v2_y_u; + + y_plane += y_stride; + ((unsigned short *)y_plane)[0] = v2_a->v2_y_l; + ((unsigned short *)y_plane)[1] = v2_b->v2_y_l; + + y_plane += y_stride; + ((unsigned short *)y_plane)[0] = v2_c->v2_y_u; + ((unsigned short *)y_plane)[1] = v2_d->v2_y_u; + + y_plane += y_stride; + ((unsigned short *)y_plane)[0] = v2_c->v2_y_l; + ((unsigned short *)y_plane)[1] = v2_d->v2_y_l; + + // render the color planes + u_plane[0] = v2_a->u; + u_plane[1] = v2_b->u; + u_plane += u_stride; + u_plane[0] = v2_c->u; + u_plane[1] = v2_d->u; + + v_plane[0] = v2_a->v; + v_plane[1] = v2_b->v; + v_plane += v_stride; + v_plane[0] = v2_c->v; + v_plane[1] = v2_d->v; +} + +// This function copies the 4x4 block from the prev_*_planes to the +// current *_planes. +inline void copy_4x4_block( + unsigned char *y_plane, + unsigned char *u_plane, + unsigned char *v_plane, + unsigned char *prev_y_plane, + unsigned char *prev_u_plane, + unsigned char *prev_v_plane, + unsigned int y_stride, + unsigned int u_stride, + unsigned int v_stride) +{ + int i; + + // copy over the luminance components (4 rows, 1 uint each) + for (i = 0; i < 4; i++) + { + *(unsigned int *)y_plane = *(unsigned int *)prev_y_plane; + y_plane += y_stride; + prev_y_plane += y_stride; + } + + // copy the chrominance values + for (i = 0; i < 2; i++) + { + *(unsigned short*)u_plane = *(unsigned short*)prev_u_plane; + u_plane += u_stride; + prev_u_plane += u_stride; + *(unsigned short*)v_plane = *(unsigned short*)prev_v_plane; + v_plane += v_stride; + prev_v_plane += v_stride; + } +} + +// This function copies the 8x8 block from the prev_*_planes to the +// current *_planes. +inline void copy_8x8_block( + unsigned char *y_plane, + unsigned char *u_plane, + unsigned char *v_plane, + unsigned char *prev_y_plane, + unsigned char *prev_u_plane, + unsigned char *prev_v_plane, + unsigned int y_stride, + unsigned int u_stride, + unsigned int v_stride) +{ + int i; + + // copy over the luminance components (8 rows, 2 uints each) + for (i = 0; i < 8; i++) + { + ((unsigned int *)y_plane)[0] = ((unsigned int *)prev_y_plane)[0]; + ((unsigned int *)y_plane)[1] = ((unsigned int *)prev_y_plane)[1]; + y_plane += y_stride; + prev_y_plane += y_stride; + } + + // copy the chrominance values + for (i = 0; i < 4; i++) + { + *(unsigned int*)u_plane = *(unsigned int*)prev_u_plane; + u_plane += u_stride; + prev_u_plane += u_stride; + *(unsigned int*)v_plane = *(unsigned int*)prev_v_plane; + v_plane += v_stride; + prev_v_plane += v_stride; + } +} + +// This function creates storage space for the vector codebooks. +void *roq_decode_video_init(void) +{ + roqvideo_info *info = + (roqvideo_info *)calloc(sizeof(roqvideo_info), 1); + + info->prev_frame = NULL; + + return info; +} + +#define EMPTY_ROQ_CODEWORD 0xFFFF0000 + +#define FETCH_NEXT_CODE() \ + if (current_roq_codeword == EMPTY_ROQ_CODEWORD) \ + { \ + if (stream_ptr + 2 > encoded_size) \ + { \ + mp_msg(MSGT_DECVIDEO, MSGL_WARN, \ + "RoQ video: stream pointer just went out of bounds (1)\n"); \ + return; \ + } \ + current_roq_codeword = (0x0000FFFF) | \ + (encoded[stream_ptr + 0] << 16) | \ + (encoded[stream_ptr + 1] << 24); \ + stream_ptr += 2; \ + } \ + roq_code = ((current_roq_codeword >> 30) & 0x03); \ + current_roq_codeword <<= 2; + +#define FETCH_NEXT_ARGUMENT() \ + if (stream_ptr + 1 > encoded_size) \ + { \ + mp_msg(MSGT_DECVIDEO, MSGL_WARN, \ + "RoQ video: stream pointer just went out of bounds (2)\n"); \ + return; \ + } \ + argument = encoded[stream_ptr++]; + +#define CHECK_PREV_FRAME() \ + if (!info->prev_frame) \ + { \ + mp_msg(MSGT_DECVIDEO, MSGL_WARN, \ + "RoQ video: can't handle motion vector when there's no previous frame\n"); \ + return; \ + } + +void roq_decode_video(void *context, unsigned char *encoded, + int encoded_size, mp_image_t *mpi) +{ + roqvideo_info *info = (roqvideo_info *)context; + + int stream_ptr = 0; + int i, j; + int chunk_length; + int v2_count; + int v4_count; + + int roq_code; + unsigned int current_roq_codeword = EMPTY_ROQ_CODEWORD; + unsigned char argument = 0; + char mean_motion_x; + char mean_motion_y; + int mx, my; // for calculating the motion vector + + int mblock_x = 0; + int mblock_y = 0; + int quad8_x, quad8_y; // for pointing to 8x8 blocks in a macroblock + int quad4_x, quad4_y; // for pointing to 4x4 blocks in an 8x8 block + + unsigned char *y_plane; + unsigned char *u_plane; + unsigned char *v_plane; + unsigned char *prev_y_plane; + unsigned char *prev_u_plane; + unsigned char *prev_v_plane; + unsigned int y_stride = mpi->stride[0]; + unsigned int u_stride = mpi->stride[1]; + unsigned int v_stride = mpi->stride[2]; + + roq_v4_codebook v4; + + // make sure the encoded chunk is of minimal acceptable length + if (encoded_size < 8) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "RoQ video: chunk isn't even 8 bytes long (minimum acceptable length)\n"); + return; + } + + // make sure the resolution checks out + if ((mpi->width % 16 != 0) || (mpi->height % 16 != 0)) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "RoQ video resolution: %d x %d; expected dimensions divisible by 16\n"); + return; + } + + if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_CODEBOOK) + { + stream_ptr += 2; + chunk_length = LE_32(&encoded[stream_ptr]); + stream_ptr += 4; + v4_count = encoded[stream_ptr++]; + v2_count = encoded[stream_ptr++]; + if (v2_count == 0) + v2_count = 256; + if ((v4_count == 0) && (v2_count * 6 < chunk_length)) + v4_count = 256; + + // make sure the lengths agree with each other + if (((v2_count * 6) + (v4_count * 4)) != chunk_length) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "RoQ video: encountered quad codebook chunk with weird lengths (1)\n"); + return; + } + if ((v2_count * 6) > (encoded_size - stream_ptr)) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "RoQ video: encountered quad codebook chunk with weird lengths (2)\n"); + return; + } + + // load the 2x2 vectors + for (i = 0; i < v2_count; i++) + { + info->v2[i].y0 = encoded[stream_ptr++]; + info->v2[i].y1 = encoded[stream_ptr++]; + info->v2[i].y2 = encoded[stream_ptr++]; + info->v2[i].y3 = encoded[stream_ptr++]; + info->v2[i].u = encoded[stream_ptr++]; + info->v2[i].v = encoded[stream_ptr++]; + prep_v2(&info->v2[i]); + } + + if ((v4_count * 4) > (encoded_size - stream_ptr)) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "RoQ video: encountered quad codebook chunk with weird lengths (3)\n"); + return; + } + + // load the 4x4 vectors + for (i = 0; i < v4_count; i++) + { + info->v4[i].v2_index[0] = encoded[stream_ptr++]; + info->v4[i].v2_index[1] = encoded[stream_ptr++]; + info->v4[i].v2_index[2] = encoded[stream_ptr++]; + info->v4[i].v2_index[3] = encoded[stream_ptr++]; + } + } + + if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_VQ) + { + stream_ptr += 2; + chunk_length = LE_32(&encoded[stream_ptr]); + stream_ptr += 4; + mean_motion_y = encoded[stream_ptr++]; + mean_motion_x = encoded[stream_ptr++]; + + // start by copying entire previous frame + if (info->prev_frame) + { + memcpy(mpi->planes[0], info->prev_frame->planes[0], + mpi->width * mpi->height); + memcpy(mpi->planes[1], info->prev_frame->planes[1], + (mpi->width * mpi->height) / 4); + memcpy(mpi->planes[2], info->prev_frame->planes[2], + (mpi->width * mpi->height) / 4); + } + + // iterate through the 16x16 macroblocks + for (mblock_y = 0; mblock_y < mpi->height; mblock_y += 16) + { + for (mblock_x = 0; mblock_x < mpi->width; mblock_x += 16) + { + // iterate through the 4 quadrants of the macroblock + for (i = 0; i < 4; i++) + { + quad8_x = mblock_x; + quad8_y = mblock_y; + if (i & 0x01) quad8_x += 8; + if (i & 0x02) quad8_y += 8; + + // set up the planes + y_plane = mpi->planes[0] + quad8_y * y_stride + quad8_x; + u_plane = mpi->planes[1] + (quad8_y / 2) * u_stride + (quad8_x / 2); + v_plane = mpi->planes[2] + (quad8_y / 2) * v_stride + (quad8_x / 2); + + // decide how to handle this 8x8 quad + FETCH_NEXT_CODE(); + switch(roq_code) + { + // 8x8 block is the same as in the previous frame; + // skip it + case 0: + break; + + // 8x8 block is painted with an 8x8 block from the last frame + // (i.e., motion compensation) + case 1: + CHECK_PREV_FRAME(); + + // prepare the pointers to the planes in the previous frame + FETCH_NEXT_ARGUMENT(); // argument contains motion vectors + + // figure out the motion vectors + mx = quad8_x + 8 - (argument >> 4) - mean_motion_x; + my = quad8_y + 8 - (argument & 0x0F) - mean_motion_y; + + prev_y_plane = info->prev_frame->planes[0] + + my * y_stride + mx; + prev_u_plane = info->prev_frame->planes[1] + + (my / 2) * u_stride + (mx + 1) / 2; + prev_v_plane = info->prev_frame->planes[2] + + (my / 2) * v_stride + (mx + 1) / 2; + +// sanity check before rendering + copy_8x8_block( + y_plane, + u_plane, + v_plane, + prev_y_plane, + prev_u_plane, + prev_v_plane, + y_stride, + u_stride, + v_stride + ); + break; + + // 8x8 block is painted with a doublesized 4x4 vector + case 2: + FETCH_NEXT_ARGUMENT(); + v4 = info->v4[argument]; + +// sanity check before rendering + // iterate through 4 4x4 blocks + for (j = 0; j < 4; j++) + { + quad4_x = quad8_x; + quad4_y = quad8_y; + if (j & 0x01) quad4_x += 4; + if (j & 0x02) quad4_y += 4; + + // set up the planes + y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x; + u_plane = mpi->planes[1] + + (quad4_y / 2) * u_stride + (quad4_x / 2); + v_plane = mpi->planes[2] + + (quad4_y / 2) * v_stride + (quad4_x / 2); + + paint_v2double_block( + y_plane, + u_plane, + v_plane, + &info->v2[v4.v2_index[j]], + y_stride, + u_stride, + v_stride + ); + } + break; + + // 8x8 block is broken down into 4 4x4 blocks and painted using + // 4 different codes. + case 3: + // iterate through 4 4x4 blocks + for (j = 0; j < 4; j++) + { + quad4_x = quad8_x; + quad4_y = quad8_y; + if (j & 0x01) quad4_x += 4; + if (j & 0x02) quad4_y += 4; + + // set up the planes + y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x; + u_plane = mpi->planes[1] + + (quad4_y / 2) * u_stride + (quad4_x / 2); + v_plane = mpi->planes[2] + + (quad4_y / 2) * v_stride + (quad4_x / 2); + + // decide how to handle this 4x4 quad + FETCH_NEXT_CODE(); + switch(roq_code) + { + // 4x4 block is the same as in the previous frame; + // skip it + case 0: + break; + + // 4x4 block is motion compensated from the previous frame + case 1: + CHECK_PREV_FRAME(); + // prepare the pointers to the planes in the previous frame + FETCH_NEXT_ARGUMENT(); // argument contains motion vectors + + // figure out the motion vectors + mx = quad4_x + 8 - (argument >> 4) - mean_motion_x; + my = quad4_y + 8 - (argument & 0x0F) - mean_motion_y; + + prev_y_plane = info->prev_frame->planes[0] + + my * y_stride + mx; + prev_u_plane = info->prev_frame->planes[1] + + (my / 2) * u_stride + (mx + 1) / 2; + prev_v_plane = info->prev_frame->planes[2] + + (my / 2) * u_stride + (mx + 1) / 2; + +// sanity check before rendering + copy_4x4_block( + y_plane, + u_plane, + v_plane, + prev_y_plane, + prev_u_plane, + prev_v_plane, + y_stride, + u_stride, + v_stride + ); + break; + + // 4x4 block is copied directly from v4 vector table + case 2: + FETCH_NEXT_ARGUMENT(); + v4 = info->v4[argument]; + + paint_v4_block( + y_plane, + u_plane, + v_plane, + y_stride, + u_stride, + v_stride, + &info->v2[v4.v2_index[0]], + &info->v2[v4.v2_index[1]], + &info->v2[v4.v2_index[2]], + &info->v2[v4.v2_index[3]]); + break; + + // 4x4 block is built from 4 2x2 vectors + case 3: + if (stream_ptr + 4 > encoded_size) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "RoQ video: stream pointer just went out of bounds (2)\n"); + return; + } + paint_v4_block( + y_plane, + u_plane, + v_plane, + y_stride, + u_stride, + v_stride, + &info->v2[encoded[stream_ptr + 0]], + &info->v2[encoded[stream_ptr + 1]], + &info->v2[encoded[stream_ptr + 2]], + &info->v2[encoded[stream_ptr + 3]]); + stream_ptr += 4; + break; + } + } + break; + } + } + } + } + } + + // one last sanity check on the way out + // (apparently, it's not unusual to have 2 bytes left over after decode) + if (stream_ptr < encoded_size - 2) + { + mp_msg(MSGT_DECVIDEO, MSGL_WARN, + "RoQ video: completed frame decode with bytes left over (%d < %d)\n", + stream_ptr, encoded_size); + } + + // save the current frame as the previous frame for the next iteration + info->prev_frame = mpi; +} + +// Initialize the RoQ audio decoder, which is to say, initialize the table +// of squares. +void *roq_decode_audio_init(void) +{ + short *square_array; + short square; + int i; + + square_array = (short *)malloc(256 * sizeof(short)); + if (!square_array) + return NULL; + + for (i = 0; i < 128; i++) + { + square = i * i; + square_array[i] = square; + square_array[i + 128] = -square; + } + + return square_array; +} + +int roq_decode_audio( + unsigned short *output, + unsigned char *input, + int encoded_size, + int channels, + void *context) +{ + short *square_array = (short *)context; + int i; + int predictor[2]; + int channel_number = 0; + + // prepare the initial predictors + if (channels == 1) + predictor[0] = LE_16(&input[0]); + else + { + predictor[0] = input[1] << 8; + predictor[1] = input[0] << 8; + } + SE_16BIT(predictor[0]); + SE_16BIT(predictor[1]); + + // decode the samples + for (i = 2; i < encoded_size; i++) + { + predictor[channel_number] += square_array[input[i]]; + CLAMP_S16(predictor[channel_number]); + output[i - 2] = predictor[channel_number]; + + // toggle channel + channel_number ^= channels - 1; + } + + // return the number of samples decoded + return (encoded_size - 2); +} diff -r fd85802f755b -r 628c85c15c7b libmpcodecs/native/roqav.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/native/roqav.h Sat Apr 13 18:03:02 2002 +0000 @@ -0,0 +1,12 @@ +#ifndef ROQAV_H +#define ROQAV_H + +void *roq_decode_video_init(void); +void roq_decode_video(void *context, unsigned char *encoded, + int encoded_size, mp_image_t *mpi); + +void *roq_decode_audio_init(void); +int roq_decode_audio(unsigned short *output, unsigned char *input, + int encoded_size, int channels, void *context); + +#endif // ROQAV_H diff -r fd85802f755b -r 628c85c15c7b lzoconf.h --- a/lzoconf.h Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,386 +0,0 @@ -/* lzoconf.h -- configuration for the LZO real-time data compression library - - This file is part of the LZO real-time data compression library. - - Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - Markus F.X.J. Oberhumer - - http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html - */ - - -#ifndef __LZOCONF_H -#define __LZOCONF_H - -#define LZO_VERSION 0x1070 -#define LZO_VERSION_STRING "1.07" -#define LZO_VERSION_DATE "Oct 18 2000" - -/* internal Autoconf configuration file - only used when building LZO */ -#if defined(LZO_HAVE_CONFIG_H) -# include -#endif -#include - -#ifdef __cplusplus -extern "C" { -#endif - - -/*********************************************************************** -// LZO requires a conforming -************************************************************************/ - -#if !defined(CHAR_BIT) || (CHAR_BIT != 8) -# error "invalid CHAR_BIT" -#endif -#if !defined(UCHAR_MAX) || !defined(UINT_MAX) || !defined(ULONG_MAX) -# error "check your compiler installation" -#endif -#if (USHRT_MAX < 1) || (UINT_MAX < 1) || (ULONG_MAX < 1) -# error "your limits.h macros are broken" -#endif - -/* workaround a cpp bug under hpux 10.20 */ -#define LZO_0xffffffffL 4294967295ul - - -/*********************************************************************** -// architecture defines -************************************************************************/ - -#if !defined(__LZO_WIN) && !defined(__LZO_DOS) && !defined(__LZO_OS2) -# if defined(__WINDOWS__) || defined(_WINDOWS) || defined(_Windows) -# define __LZO_WIN -# elif defined(__WIN32__) || defined(_WIN32) || defined(WIN32) -# define __LZO_WIN -# elif defined(__NT__) || defined(__NT_DLL__) || defined(__WINDOWS_386__) -# define __LZO_WIN -# elif defined(__DOS__) || defined(__MSDOS__) || defined(MSDOS) -# define __LZO_DOS -# elif defined(__OS2__) || defined(__OS2V2__) || defined(OS2) -# define __LZO_OS2 -# elif defined(__palmos__) -# define __LZO_PALMOS -# elif defined(__TOS__) || defined(__atarist__) -# define __LZO_TOS -# endif -#endif - -#if (UINT_MAX < LZO_0xffffffffL) -# if defined(__LZO_WIN) -# define __LZO_WIN16 -# elif defined(__LZO_DOS) -# define __LZO_DOS16 -# elif defined(__LZO_PALMOS) -# define __LZO_PALMOS16 -# elif defined(__LZO_TOS) -# define __LZO_TOS16 -# elif defined(__C166__) -# else -# error "16-bit target not supported - contact me for porting hints" -# endif -#endif - -#if !defined(__LZO_i386) -# if defined(__LZO_DOS) || defined(__LZO_WIN16) -# define __LZO_i386 -# elif defined(__i386__) || defined(__386__) || defined(_M_IX86) -# define __LZO_i386 -# endif -#endif - -#if defined(__LZO_STRICT_16BIT) -# if (UINT_MAX < LZO_0xffffffffL) -# include -# endif -#endif - -/* memory checkers */ -#if !defined(__LZO_CHECKER) -# if defined(__BOUNDS_CHECKING_ON) -# define __LZO_CHECKER -# elif defined(__CHECKER__) -# define __LZO_CHECKER -# elif defined(__INSURE__) -# define __LZO_CHECKER -# elif defined(__PURIFY__) -# define __LZO_CHECKER -# endif -#endif - - -/*********************************************************************** -// integral and pointer types -************************************************************************/ - -/* Integral types with 32 bits or more */ -#if !defined(LZO_UINT32_MAX) -# if (UINT_MAX >= LZO_0xffffffffL) - typedef unsigned int lzo_uint32; - typedef int lzo_int32; -# define LZO_UINT32_MAX UINT_MAX -# define LZO_INT32_MAX INT_MAX -# define LZO_INT32_MIN INT_MIN -# elif (ULONG_MAX >= LZO_0xffffffffL) - typedef unsigned long lzo_uint32; - typedef long lzo_int32; -# define LZO_UINT32_MAX ULONG_MAX -# define LZO_INT32_MAX LONG_MAX -# define LZO_INT32_MIN LONG_MIN -# else -# error "lzo_uint32" -# endif -#endif - -/* lzo_uint is used like size_t */ -#if !defined(LZO_UINT_MAX) -# if (UINT_MAX >= LZO_0xffffffffL) - typedef unsigned int lzo_uint; - typedef int lzo_int; -# define LZO_UINT_MAX UINT_MAX -# define LZO_INT_MAX INT_MAX -# define LZO_INT_MIN INT_MIN -# elif (ULONG_MAX >= LZO_0xffffffffL) - typedef unsigned long lzo_uint; - typedef long lzo_int; -# define LZO_UINT_MAX ULONG_MAX -# define LZO_INT_MAX LONG_MAX -# define LZO_INT_MIN LONG_MIN -# else -# error "lzo_uint" -# endif -#endif - - -/* Memory model that allows to access memory at offsets of lzo_uint. */ -#if !defined(__LZO_MMODEL) -# if (LZO_UINT_MAX <= UINT_MAX) -# define __LZO_MMODEL -# elif defined(__LZO_DOS16) || defined(__LZO_WIN16) -# define __LZO_MMODEL __huge -# define LZO_999_UNSUPPORTED -# elif defined(__LZO_PALMOS16) || defined(__LZO_TOS16) -# define __LZO_MMODEL -# else -# error "__LZO_MMODEL" -# endif -#endif - -/* no typedef here because of const-pointer issues */ -#define lzo_byte unsigned char __LZO_MMODEL -#define lzo_bytep unsigned char __LZO_MMODEL * -#define lzo_charp char __LZO_MMODEL * -#define lzo_voidp void __LZO_MMODEL * -#define lzo_shortp short __LZO_MMODEL * -#define lzo_ushortp unsigned short __LZO_MMODEL * -#define lzo_uint32p lzo_uint32 __LZO_MMODEL * -#define lzo_int32p lzo_int32 __LZO_MMODEL * -#define lzo_uintp lzo_uint __LZO_MMODEL * -#define lzo_intp lzo_int __LZO_MMODEL * -#define lzo_voidpp lzo_voidp __LZO_MMODEL * -#define lzo_bytepp lzo_bytep __LZO_MMODEL * - -typedef int lzo_bool; - -#ifndef lzo_sizeof_dict_t -# define lzo_sizeof_dict_t sizeof(lzo_bytep) -#endif - - -/*********************************************************************** -// function types -************************************************************************/ - -/* linkage */ -#if !defined(__LZO_EXTERN_C) -# ifdef __cplusplus -# define __LZO_EXTERN_C extern "C" -# else -# define __LZO_EXTERN_C extern -# endif -#endif - -/* calling conventions */ -#if !defined(__LZO_CDECL) -# if defined(__LZO_DOS16) || defined(__LZO_WIN16) -# define __LZO_CDECL __far __cdecl -# elif defined(__LZO_i386) && defined(_MSC_VER) -# define __LZO_CDECL __cdecl -# elif defined(__LZO_i386) && defined(__WATCOMC__) -# define __LZO_CDECL __near __cdecl -# else -# define __LZO_CDECL -# endif -#endif -#if !defined(__LZO_ENTRY) -# define __LZO_ENTRY __LZO_CDECL -#endif - -/* DLL export information */ -#if !defined(__LZO_EXPORT1) -# define __LZO_EXPORT1 -#endif -#if !defined(__LZO_EXPORT2) -# define __LZO_EXPORT2 -#endif - -/* calling convention for C functions */ -#if !defined(LZO_PUBLIC) -# define LZO_PUBLIC(_rettype) __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_ENTRY -#endif -#if !defined(LZO_EXTERN) -# define LZO_EXTERN(_rettype) __LZO_EXTERN_C LZO_PUBLIC(_rettype) -#endif -#if !defined(LZO_PRIVATE) -# define LZO_PRIVATE(_rettype) static _rettype __LZO_ENTRY -#endif - -/* cdecl calling convention for assembler functions */ -#if !defined(LZO_PUBLIC_CDECL) -# define LZO_PUBLIC_CDECL(_rettype) \ - __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_CDECL -#endif -#if !defined(LZO_EXTERN_CDECL) -# define LZO_EXTERN_CDECL(_rettype) __LZO_EXTERN_C LZO_PUBLIC_CDECL(_rettype) -#endif - - -typedef int -(__LZO_ENTRY *lzo_compress_t) ( const lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem ); - -typedef int -(__LZO_ENTRY *lzo_decompress_t) ( const lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem ); - -typedef int -(__LZO_ENTRY *lzo_optimize_t) ( lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem ); - -typedef int -(__LZO_ENTRY *lzo_compress_dict_t)(const lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem, - const lzo_byte *dict, lzo_uint dict_len ); - -typedef int -(__LZO_ENTRY *lzo_decompress_dict_t)(const lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem, - const lzo_byte *dict, lzo_uint dict_len ); - - -/* a progress indicator callback function */ -typedef void (__LZO_ENTRY *lzo_progress_callback_t) (lzo_uint, lzo_uint); - - -/*********************************************************************** -// error codes and prototypes -************************************************************************/ - -/* Error codes for the compression/decompression functions. Negative - * values are errors, positive values will be used for special but - * normal events. - */ -#define LZO_E_OK 0 -#define LZO_E_ERROR (-1) -#define LZO_E_OUT_OF_MEMORY (-2) /* not used right now */ -#define LZO_E_NOT_COMPRESSIBLE (-3) /* not used right now */ -#define LZO_E_INPUT_OVERRUN (-4) -#define LZO_E_OUTPUT_OVERRUN (-5) -#define LZO_E_LOOKBEHIND_OVERRUN (-6) -#define LZO_E_EOF_NOT_FOUND (-7) -#define LZO_E_INPUT_NOT_CONSUMED (-8) - - -/* lzo_init() should be the first function you call. - * Check the return code ! - * - * lzo_init() is a macro to allow checking that the library and the - * compiler's view of various types are consistent. - */ -#define lzo_init() __lzo_init2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\ - (int)sizeof(long),(int)sizeof(lzo_uint32),(int)sizeof(lzo_uint),\ - (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\ - (int)sizeof(lzo_compress_t)) -LZO_EXTERN(int) __lzo_init2(unsigned,int,int,int,int,int,int,int,int,int); - -/* version functions (useful for shared libraries) */ -LZO_EXTERN(unsigned) lzo_version(void); -LZO_EXTERN(const char *) lzo_version_string(void); -LZO_EXTERN(const char *) lzo_version_date(void); -LZO_EXTERN(const lzo_charp) _lzo_version_string(void); -LZO_EXTERN(const lzo_charp) _lzo_version_date(void); - -/* string functions */ -LZO_EXTERN(int) -lzo_memcmp(const lzo_voidp _s1, const lzo_voidp _s2, lzo_uint _len); -LZO_EXTERN(lzo_voidp) -lzo_memcpy(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len); -LZO_EXTERN(lzo_voidp) -lzo_memmove(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len); -LZO_EXTERN(lzo_voidp) -lzo_memset(lzo_voidp _s, int _c, lzo_uint _len); - -/* checksum functions */ -LZO_EXTERN(lzo_uint32) -lzo_adler32(lzo_uint32 _adler, const lzo_byte *_buf, lzo_uint _len); -LZO_EXTERN(lzo_uint32) -lzo_crc32(lzo_uint32 _c, const lzo_byte *_buf, lzo_uint _len); - -/* memory allocation functions */ -LZO_EXTERN(lzo_bytep) lzo_alloc(lzo_uint _nelems, lzo_uint _size); -LZO_EXTERN(lzo_bytep) lzo_malloc(lzo_uint _size); -LZO_EXTERN(void) lzo_free(lzo_voidp _ptr); - -typedef lzo_bytep (__LZO_ENTRY *lzo_alloc_hook_t) (lzo_uint, lzo_uint); -typedef void (__LZO_ENTRY *lzo_free_hook_t) (lzo_voidp); - -extern lzo_alloc_hook_t lzo_alloc_hook; -extern lzo_free_hook_t lzo_free_hook; - -/* misc. */ -LZO_EXTERN(lzo_bool) lzo_assert(int _expr); -LZO_EXTERN(int) _lzo_config_check(void); -typedef union { lzo_bytep p; lzo_uint u; } __lzo_pu_u; -typedef union { lzo_bytep p; lzo_uint32 u32; } __lzo_pu32_u; - -/* align a char pointer on a boundary that is a multiple of `size' */ -LZO_EXTERN(unsigned) __lzo_align_gap(const lzo_voidp _ptr, lzo_uint _size); -#define LZO_PTR_ALIGN_UP(_ptr,_size) \ - ((_ptr) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(_ptr),(lzo_uint)(_size))) - -/* deprecated - only for backward compatibility */ -#define LZO_ALIGN(_ptr,_size) LZO_PTR_ALIGN_UP(_ptr,_size) - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* already included */ - diff -r fd85802f755b -r 628c85c15c7b minilzo.c --- a/minilzo.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2848 +0,0 @@ -/* minilzo.c -- mini subset of the LZO real-time data compression library - - This file is part of the LZO real-time data compression library. - - Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - Markus F.X.J. Oberhumer - - http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html - */ - -/* - * NOTE: - * the full LZO package can be found at - * http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html - */ - -#define __LZO_IN_MINILZO - -#ifdef MINILZO_HAVE_CONFIG_H -# include -#endif - -#undef LZO_HAVE_CONFIG_H -#include "minilzo.h" - -#if !defined(MINILZO_VERSION) || (MINILZO_VERSION != 0x1070) -# error "version mismatch in miniLZO source files" -#endif - -#ifdef MINILZO_HAVE_CONFIG_H -# define LZO_HAVE_CONFIG_H -#endif - -#if !defined(LZO_NO_SYS_TYPES_H) -# include -#endif -#include - -#ifndef __LZO_CONF_H -#define __LZO_CONF_H - -#if !defined(__LZO_IN_MINILZO) -# ifndef __LZOCONF_H -# include -# endif -#endif - -#if defined(__BOUNDS_CHECKING_ON) -# include -#else -# define BOUNDS_CHECKING_OFF_DURING(stmt) stmt -# define BOUNDS_CHECKING_OFF_IN_EXPR(expr) (expr) -#endif - -#if !defined(LZO_HAVE_CONFIG_H) -# include -# include -# if !defined(NO_STDLIB_H) -# include -# endif -# define HAVE_MEMCMP -# define HAVE_MEMCPY -# define HAVE_MEMMOVE -# define HAVE_MEMSET -#else -# include -# if defined(STDC_HEADERS) -# include -# include -# endif -# if defined(HAVE_STDDEF_H) -# include -# endif -# if defined(HAVE_MEMORY_H) -# include -# endif -#endif - -#if defined(__LZO_DOS16) || defined(__LZO_WIN16) -# define HAVE_MALLOC_H -# define HAVE_HALLOC -#endif - -#undef NDEBUG -#if !defined(LZO_DEBUG) -# define NDEBUG -#endif -#if defined(LZO_DEBUG) || !defined(NDEBUG) -# if !defined(NO_STDIO_H) -# include -# endif -#endif -#include - -#if !defined(LZO_UNUSED) -# define LZO_UNUSED(parm) (parm = parm) -#endif - -#if !defined(__inline__) && !defined(__GNUC__) -# if defined(__cplusplus) -# define __inline__ inline -# else -# define __inline__ -# endif -#endif - -#if defined(NO_MEMCMP) -# undef HAVE_MEMCMP -#endif - -#if !defined(HAVE_MEMCMP) -# undef memcmp -# define memcmp lzo_memcmp -#endif -#if !defined(HAVE_MEMCPY) -# undef memcpy -# define memcpy lzo_memcpy -#endif -#if !defined(HAVE_MEMMOVE) -# undef memmove -# define memmove lzo_memmove -#endif -#if !defined(HAVE_MEMSET) -# undef memset -# define memset lzo_memset -#endif - -#if 1 -# define LZO_BYTE(x) ((unsigned char) (x)) -#else -# define LZO_BYTE(x) ((unsigned char) ((x) & 0xff)) -#endif -#if 0 -# define LZO_USHORT(x) ((unsigned short) (x)) -#else -# define LZO_USHORT(x) ((unsigned short) ((x) & 0xffff)) -#endif - -#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b)) -#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b)) -#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c)) -#define LZO_MIN3(a,b,c) ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c)) - -#define lzo_sizeof(type) ((lzo_uint) (sizeof(type))) - -#define LZO_HIGH(array) ((lzo_uint) (sizeof(array)/sizeof(*(array)))) - -#define LZO_SIZE(bits) (1u << (bits)) -#define LZO_MASK(bits) (LZO_SIZE(bits) - 1) - -#define LZO_LSIZE(bits) (1ul << (bits)) -#define LZO_LMASK(bits) (LZO_LSIZE(bits) - 1) - -#define LZO_USIZE(bits) ((lzo_uint) 1 << (bits)) -#define LZO_UMASK(bits) (LZO_USIZE(bits) - 1) - -#define LZO_STYPE_MAX(b) (((1l << (8*(b)-2)) - 1l) + (1l << (8*(b)-2))) -#define LZO_UTYPE_MAX(b) (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1))) - -#if !defined(SIZEOF_UNSIGNED) -# if (UINT_MAX == 0xffff) -# define SIZEOF_UNSIGNED 2 -# elif (UINT_MAX == LZO_0xffffffffL) -# define SIZEOF_UNSIGNED 4 -# elif (UINT_MAX >= LZO_0xffffffffL) -# define SIZEOF_UNSIGNED 8 -# else -# error SIZEOF_UNSIGNED -# endif -#endif - -#if !defined(SIZEOF_UNSIGNED_LONG) -# if (ULONG_MAX == LZO_0xffffffffL) -# define SIZEOF_UNSIGNED_LONG 4 -# elif (ULONG_MAX >= LZO_0xffffffffL) -# define SIZEOF_UNSIGNED_LONG 8 -# else -# error SIZEOF_UNSIGNED_LONG -# endif -#endif - -#if !defined(SIZEOF_SIZE_T) -# define SIZEOF_SIZE_T SIZEOF_UNSIGNED -#endif -#if !defined(SIZE_T_MAX) -# define SIZE_T_MAX LZO_UTYPE_MAX(SIZEOF_SIZE_T) -#endif - -#if 1 && defined(__LZO_i386) && (UINT_MAX == LZO_0xffffffffL) -# if !defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX == 0xffff) -# define LZO_UNALIGNED_OK_2 -# endif -# if !defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX == LZO_0xffffffffL) -# define LZO_UNALIGNED_OK_4 -# endif -#endif - -#if defined(LZO_UNALIGNED_OK_2) || defined(LZO_UNALIGNED_OK_4) -# if !defined(LZO_UNALIGNED_OK) -# define LZO_UNALIGNED_OK -# endif -#endif - -#if defined(__LZO_NO_UNALIGNED) -# undef LZO_UNALIGNED_OK -# undef LZO_UNALIGNED_OK_2 -# undef LZO_UNALIGNED_OK_4 -#endif - -#if defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX != 0xffff) -# error "LZO_UNALIGNED_OK_2 must not be defined on this system" -#endif -#if defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL) -# error "LZO_UNALIGNED_OK_4 must not be defined on this system" -#endif - -#if defined(__LZO_NO_ALIGNED) -# undef LZO_ALIGNED_OK_4 -#endif - -#if defined(LZO_ALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL) -# error "LZO_ALIGNED_OK_4 must not be defined on this system" -#endif - -#define LZO_LITTLE_ENDIAN 1234 -#define LZO_BIG_ENDIAN 4321 -#define LZO_PDP_ENDIAN 3412 - -#if !defined(LZO_BYTE_ORDER) -# if defined(MFX_BYTE_ORDER) -# define LZO_BYTE_ORDER MFX_BYTE_ORDER -# elif defined(__LZO_i386) -# define LZO_BYTE_ORDER LZO_LITTLE_ENDIAN -# elif defined(BYTE_ORDER) -# define LZO_BYTE_ORDER BYTE_ORDER -# elif defined(__BYTE_ORDER) -# define LZO_BYTE_ORDER __BYTE_ORDER -# endif -#endif - -#if defined(LZO_BYTE_ORDER) -# if (LZO_BYTE_ORDER != LZO_LITTLE_ENDIAN) && \ - (LZO_BYTE_ORDER != LZO_BIG_ENDIAN) -# error "invalid LZO_BYTE_ORDER" -# endif -#endif - -#if defined(LZO_UNALIGNED_OK) && !defined(LZO_BYTE_ORDER) -# error "LZO_BYTE_ORDER is not defined" -#endif - -#define LZO_OPTIMIZE_GNUC_i386_IS_BUGGY - -#if defined(NDEBUG) && !defined(LZO_DEBUG) && !defined(__LZO_CHECKER) -# if defined(__GNUC__) && defined(__i386__) -# if !defined(LZO_OPTIMIZE_GNUC_i386_IS_BUGGY) -# define LZO_OPTIMIZE_GNUC_i386 -# endif -# endif -#endif - -__LZO_EXTERN_C int __lzo_init_done; -__LZO_EXTERN_C const lzo_byte __lzo_copyright[]; -LZO_EXTERN(const lzo_byte *) lzo_copyright(void); -__LZO_EXTERN_C const lzo_uint32 _lzo_crc32_table[256]; - -#define _LZO_STRINGIZE(x) #x -#define _LZO_MEXPAND(x) _LZO_STRINGIZE(x) - -#define _LZO_CONCAT2(a,b) a ## b -#define _LZO_CONCAT3(a,b,c) a ## b ## c -#define _LZO_CONCAT4(a,b,c,d) a ## b ## c ## d -#define _LZO_CONCAT5(a,b,c,d,e) a ## b ## c ## d ## e - -#define _LZO_ECONCAT2(a,b) _LZO_CONCAT2(a,b) -#define _LZO_ECONCAT3(a,b,c) _LZO_CONCAT3(a,b,c) -#define _LZO_ECONCAT4(a,b,c,d) _LZO_CONCAT4(a,b,c,d) -#define _LZO_ECONCAT5(a,b,c,d,e) _LZO_CONCAT5(a,b,c,d,e) - -#if 0 - -#define __LZO_IS_COMPRESS_QUERY(i,il,o,ol,w) ((lzo_voidp)(o) == (w)) -#define __LZO_QUERY_COMPRESS(i,il,o,ol,w,n,s) \ - (*ol = (n)*(s), LZO_E_OK) - -#define __LZO_IS_DECOMPRESS_QUERY(i,il,o,ol,w) ((lzo_voidp)(o) == (w)) -#define __LZO_QUERY_DECOMPRESS(i,il,o,ol,w,n,s) \ - (*ol = (n)*(s), LZO_E_OK) - -#define __LZO_IS_OPTIMIZE_QUERY(i,il,o,ol,w) ((lzo_voidp)(o) == (w)) -#define __LZO_QUERY_OPTIMIZE(i,il,o,ol,w,n,s) \ - (*ol = (n)*(s), LZO_E_OK) - -#endif - -#ifndef __LZO_PTR_H -#define __LZO_PTR_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(__LZO_DOS16) || defined(__LZO_WIN16) -# include -# if 1 && defined(__WATCOMC__) -# include - __LZO_EXTERN_C unsigned char _HShift; -# define __LZO_HShift _HShift -# elif 1 && defined(_MSC_VER) - __LZO_EXTERN_C unsigned short __near _AHSHIFT; -# define __LZO_HShift ((unsigned) &_AHSHIFT) -# elif defined(__LZO_WIN16) -# define __LZO_HShift 3 -# else -# define __LZO_HShift 12 -# endif -# if !defined(_FP_SEG) && defined(FP_SEG) -# define _FP_SEG FP_SEG -# endif -# if !defined(_FP_OFF) && defined(FP_OFF) -# define _FP_OFF FP_OFF -# endif -#endif - -#if (UINT_MAX >= LZO_0xffffffffL) - typedef ptrdiff_t lzo_ptrdiff_t; -#else - typedef long lzo_ptrdiff_t; -#endif - -#if !defined(__LZO_HAVE_PTR_T) -# if defined(lzo_ptr_t) -# define __LZO_HAVE_PTR_T -# endif -#endif -#if !defined(__LZO_HAVE_PTR_T) -# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_LONG) -# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_LONG) - typedef unsigned long lzo_ptr_t; - typedef long lzo_sptr_t; -# define __LZO_HAVE_PTR_T -# endif -# endif -#endif -#if !defined(__LZO_HAVE_PTR_T) -# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED) -# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED) - typedef unsigned int lzo_ptr_t; - typedef int lzo_sptr_t; -# define __LZO_HAVE_PTR_T -# endif -# endif -#endif -#if !defined(__LZO_HAVE_PTR_T) -# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_SHORT) -# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_SHORT) - typedef unsigned short lzo_ptr_t; - typedef short lzo_sptr_t; -# define __LZO_HAVE_PTR_T -# endif -# endif -#endif -#if !defined(__LZO_HAVE_PTR_T) -# if defined(LZO_HAVE_CONFIG_H) || defined(SIZEOF_CHAR_P) -# error "no suitable type for lzo_ptr_t" -# else - typedef unsigned long lzo_ptr_t; - typedef long lzo_sptr_t; -# define __LZO_HAVE_PTR_T -# endif -#endif - -#if defined(__LZO_DOS16) || defined(__LZO_WIN16) -#define PTR(a) ((lzo_bytep) (a)) -#define PTR_ALIGNED_4(a) ((_FP_OFF(a) & 3) == 0) -#define PTR_ALIGNED2_4(a,b) (((_FP_OFF(a) | _FP_OFF(b)) & 3) == 0) -#else -#define PTR(a) ((lzo_ptr_t) (a)) -#define PTR_LINEAR(a) PTR(a) -#define PTR_ALIGNED_4(a) ((PTR_LINEAR(a) & 3) == 0) -#define PTR_ALIGNED_8(a) ((PTR_LINEAR(a) & 7) == 0) -#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0) -#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0) -#endif - -#define PTR_LT(a,b) (PTR(a) < PTR(b)) -#define PTR_GE(a,b) (PTR(a) >= PTR(b)) -#define PTR_DIFF(a,b) ((lzo_ptrdiff_t) (PTR(a) - PTR(b))) - -LZO_EXTERN(lzo_ptr_t) -__lzo_ptr_linear(const lzo_voidp ptr); - -typedef union -{ - char a_char; - unsigned char a_uchar; - short a_short; - unsigned short a_ushort; - int a_int; - unsigned int a_uint; - long a_long; - unsigned long a_ulong; - lzo_int a_lzo_int; - lzo_uint a_lzo_uint; - lzo_int32 a_lzo_int32; - lzo_uint32 a_lzo_uint32; - ptrdiff_t a_ptrdiff_t; - lzo_ptrdiff_t a_lzo_ptrdiff_t; - lzo_ptr_t a_lzo_ptr_t; - char * a_charp; - lzo_bytep a_lzo_bytep; - lzo_bytepp a_lzo_bytepp; -} -lzo_align_t; - -#ifdef __cplusplus -} -#endif - -#endif - -#define LZO_DETERMINISTIC - -#define LZO_DICT_USE_PTR -#if defined(__LZO_DOS16) || defined(__LZO_WIN16) || defined(__LZO_STRICT_16BIT) -# undef LZO_DICT_USE_PTR -#endif - -#if defined(LZO_DICT_USE_PTR) -# define lzo_dict_t const lzo_bytep -# define lzo_dict_p lzo_dict_t __LZO_MMODEL * -#else -# define lzo_dict_t lzo_uint -# define lzo_dict_p lzo_dict_t __LZO_MMODEL * -#endif - -#if !defined(lzo_moff_t) -#define lzo_moff_t lzo_uint -#endif - -#endif - -LZO_PUBLIC(lzo_ptr_t) -__lzo_ptr_linear(const lzo_voidp ptr) -{ - lzo_ptr_t p; - -#if defined(__LZO_DOS16) || defined(__LZO_WIN16) - p = (((lzo_ptr_t)(_FP_SEG(ptr))) << (16 - __LZO_HShift)) + (_FP_OFF(ptr)); -#else - p = PTR_LINEAR(ptr); -#endif - - return p; -} - -LZO_PUBLIC(unsigned) -__lzo_align_gap(const lzo_voidp ptr, lzo_uint size) -{ - lzo_ptr_t p, s, n; - - assert(size > 0); - - p = __lzo_ptr_linear(ptr); - s = (lzo_ptr_t) (size - 1); -#if 0 - assert((size & (size - 1)) == 0); - n = ((p + s) & ~s) - p; -#else - n = (((p + s) / size) * size) - p; -#endif - - assert((long)n >= 0); - assert(n <= s); - - return (unsigned)n; -} - -#ifndef __LZO_UTIL_H -#define __LZO_UTIL_H - -#ifndef __LZO_CONF_H -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#if 1 && defined(HAVE_MEMCPY) -#if !defined(__LZO_DOS16) && !defined(__LZO_WIN16) - -#define MEMCPY8_DS(dest,src,len) \ - memcpy(dest,src,len); \ - dest += len; \ - src += len - -#endif -#endif - -#if 0 && !defined(MEMCPY8_DS) - -#define MEMCPY8_DS(dest,src,len) \ - { do { \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - len -= 8; \ - } while (len > 0); } - -#endif - -#if !defined(MEMCPY8_DS) - -#define MEMCPY8_DS(dest,src,len) \ - { register lzo_uint __l = (len) / 8; \ - do { \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - *dest++ = *src++; \ - } while (--__l > 0); } - -#endif - -#define MEMCPY_DS(dest,src,len) \ - do *dest++ = *src++; \ - while (--len > 0) - -#define MEMMOVE_DS(dest,src,len) \ - do *dest++ = *src++; \ - while (--len > 0) - -#if 0 && defined(LZO_OPTIMIZE_GNUC_i386) - -#define BZERO8_PTR(s,l,n) \ -__asm__ __volatile__( \ - "movl %0,%%eax \n" \ - "movl %1,%%edi \n" \ - "movl %2,%%ecx \n" \ - "cld \n" \ - "rep \n" \ - "stosl %%eax,(%%edi) \n" \ - : \ - :"g" (0),"g" (s),"g" (n) \ - :"eax","edi","ecx", "memory", "cc" \ -) - -#elif (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET) - -#if 1 -#define BZERO8_PTR(s,l,n) memset((s),0,(lzo_uint)(l)*(n)) -#else -#define BZERO8_PTR(s,l,n) memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n)) -#endif - -#else - -#define BZERO8_PTR(s,l,n) \ - lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n)) - -#endif - -#if 0 -#if defined(__GNUC__) && defined(__i386__) - -unsigned char lzo_rotr8(unsigned char value, int shift); -extern __inline__ unsigned char lzo_rotr8(unsigned char value, int shift) -{ - unsigned char result; - - __asm__ __volatile__ ("movb %b1, %b0; rorb %b2, %b0" - : "=a"(result) : "g"(value), "c"(shift)); - return result; -} - -unsigned short lzo_rotr16(unsigned short value, int shift); -extern __inline__ unsigned short lzo_rotr16(unsigned short value, int shift) -{ - unsigned short result; - - __asm__ __volatile__ ("movw %b1, %b0; rorw %b2, %b0" - : "=a"(result) : "g"(value), "c"(shift)); - return result; -} - -#endif -#endif - -#ifdef __cplusplus -} -#endif - -#endif - -LZO_PUBLIC(lzo_bool) -lzo_assert(int expr) -{ - return (expr) ? 1 : 0; -} - -/* If you use the LZO library in a product, you *must* keep this - * copyright string in the executable of your product. - */ - -const lzo_byte __lzo_copyright[] = -#if !defined(__LZO_IN_MINLZO) - LZO_VERSION_STRING; -#else - "\n\n\n" - "LZO real-time data compression library.\n" - "Copyright (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer\n" - "\n" - "http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html\n" - "\n" - "LZO version: v" LZO_VERSION_STRING ", " LZO_VERSION_DATE "\n" - "LZO build date: " __DATE__ " " __TIME__ "\n\n" - "LZO special compilation options:\n" -#ifdef __cplusplus - " __cplusplus\n" -#endif -#if defined(__PIC__) - " __PIC__\n" -#elif defined(__pic__) - " __pic__\n" -#endif -#if (UINT_MAX < LZO_0xffffffffL) - " 16BIT\n" -#endif -#if defined(__LZO_STRICT_16BIT) - " __LZO_STRICT_16BIT\n" -#endif -#if (UINT_MAX > LZO_0xffffffffL) - " UINT_MAX=" _LZO_MEXPAND(UINT_MAX) "\n" -#endif -#if (ULONG_MAX > LZO_0xffffffffL) - " ULONG_MAX=" _LZO_MEXPAND(ULONG_MAX) "\n" -#endif -#if defined(LZO_BYTE_ORDER) - " LZO_BYTE_ORDER=" _LZO_MEXPAND(LZO_BYTE_ORDER) "\n" -#endif -#if defined(LZO_UNALIGNED_OK_2) - " LZO_UNALIGNED_OK_2\n" -#endif -#if defined(LZO_UNALIGNED_OK_4) - " LZO_UNALIGNED_OK_4\n" -#endif -#if defined(LZO_ALIGNED_OK_4) - " LZO_ALIGNED_OK_4\n" -#endif -#if defined(LZO_DICT_USE_PTR) - " LZO_DICT_USE_PTR\n" -#endif -#if defined(__LZO_QUERY_COMPRESS) - " __LZO_QUERY_COMPRESS\n" -#endif -#if defined(__LZO_QUERY_DECOMPRESS) - " __LZO_QUERY_DECOMPRESS\n" -#endif -#if defined(__LZO_IN_MINILZO) - " __LZO_IN_MINILZO\n" -#endif - "\n\n" - "$Id: LZO " LZO_VERSION_STRING " built " __DATE__ " " __TIME__ -#if defined(__GNUC__) && defined(__VERSION__) - " by gcc " __VERSION__ -#elif defined(__BORLANDC__) - " by Borland C " _LZO_MEXPAND(__BORLANDC__) -#elif defined(_MSC_VER) - " by Microsoft C " _LZO_MEXPAND(_MSC_VER) -#elif defined(__PUREC__) - " by Pure C " _LZO_MEXPAND(__PUREC__) -#elif defined(__SC__) - " by Symantec C " _LZO_MEXPAND(__SC__) -#elif defined(__TURBOC__) - " by Turbo C " _LZO_MEXPAND(__TURBOC__) -#elif defined(__WATCOMC__) - " by Watcom C " _LZO_MEXPAND(__WATCOMC__) -#endif - " $\n" - "$Copyright: LZO (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer $\n"; -#endif - -LZO_PUBLIC(const lzo_byte *) -lzo_copyright(void) -{ - return __lzo_copyright; -} - -LZO_PUBLIC(unsigned) -lzo_version(void) -{ - return LZO_VERSION; -} - -LZO_PUBLIC(const char *) -lzo_version_string(void) -{ - return LZO_VERSION_STRING; -} - -LZO_PUBLIC(const char *) -lzo_version_date(void) -{ - return LZO_VERSION_DATE; -} - -LZO_PUBLIC(const lzo_charp) -_lzo_version_string(void) -{ - return LZO_VERSION_STRING; -} - -LZO_PUBLIC(const lzo_charp) -_lzo_version_date(void) -{ - return LZO_VERSION_DATE; -} - -#define LZO_BASE 65521u -#define LZO_NMAX 5552 - -#define LZO_DO1(buf,i) {s1 += buf[i]; s2 += s1;} -#define LZO_DO2(buf,i) LZO_DO1(buf,i); LZO_DO1(buf,i+1); -#define LZO_DO4(buf,i) LZO_DO2(buf,i); LZO_DO2(buf,i+2); -#define LZO_DO8(buf,i) LZO_DO4(buf,i); LZO_DO4(buf,i+4); -#define LZO_DO16(buf,i) LZO_DO8(buf,i); LZO_DO8(buf,i+8); - -LZO_PUBLIC(lzo_uint32) -lzo_adler32(lzo_uint32 adler, const lzo_byte *buf, lzo_uint len) -{ - lzo_uint32 s1 = adler & 0xffff; - lzo_uint32 s2 = (adler >> 16) & 0xffff; - int k; - - if (buf == NULL) - return 1; - - while (len > 0) - { - k = len < LZO_NMAX ? (int) len : LZO_NMAX; - len -= k; - if (k >= 16) do - { - LZO_DO16(buf,0); - buf += 16; - k -= 16; - } while (k >= 16); - if (k != 0) do - { - s1 += *buf++; - s2 += s1; - } while (--k > 0); - s1 %= LZO_BASE; - s2 %= LZO_BASE; - } - return (s2 << 16) | s1; -} - -LZO_PUBLIC(int) -lzo_memcmp(const lzo_voidp s1, const lzo_voidp s2, lzo_uint len) -{ -#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCMP) - return memcmp(s1,s2,len); -#else - const lzo_byte *p1 = (const lzo_byte *) s1; - const lzo_byte *p2 = (const lzo_byte *) s2; - int d; - - if (len > 0) do - { - d = *p1 - *p2; - if (d != 0) - return d; - p1++; - p2++; - } - while (--len > 0); - return 0; -#endif -} - -LZO_PUBLIC(lzo_voidp) -lzo_memcpy(lzo_voidp dest, const lzo_voidp src, lzo_uint len) -{ -#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCPY) - return memcpy(dest,src,len); -#else - lzo_byte *p1 = (lzo_byte *) dest; - const lzo_byte *p2 = (const lzo_byte *) src; - - if (len <= 0 || p1 == p2) - return dest; - do - *p1++ = *p2++; - while (--len > 0); - return dest; -#endif -} - -LZO_PUBLIC(lzo_voidp) -lzo_memmove(lzo_voidp dest, const lzo_voidp src, lzo_uint len) -{ -#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMMOVE) - return memmove(dest,src,len); -#else - lzo_byte *p1 = (lzo_byte *) dest; - const lzo_byte *p2 = (const lzo_byte *) src; - - if (len <= 0 || p1 == p2) - return dest; - - if (p1 < p2) - { - do - *p1++ = *p2++; - while (--len > 0); - } - else - { - p1 += len; - p2 += len; - do - *--p1 = *--p2; - while (--len > 0); - } - return dest; -#endif -} - -LZO_PUBLIC(lzo_voidp) -lzo_memset(lzo_voidp s, int c, lzo_uint len) -{ -#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET) - return memset(s,c,len); -#else - lzo_byte *p = (lzo_byte *) s; - - if (len > 0) do - *p++ = LZO_BYTE(c); - while (--len > 0); - return s; -#endif -} - -#include - -#if 0 -# define IS_SIGNED(type) (((type) (1ul << (8 * sizeof(type) - 1))) < 0) -# define IS_UNSIGNED(type) (((type) (1ul << (8 * sizeof(type) - 1))) > 0) -#else -# define IS_SIGNED(type) (((type) (-1)) < ((type) 0)) -# define IS_UNSIGNED(type) (((type) (-1)) > ((type) 0)) -#endif - -static lzo_bool schedule_insns_bug(void); -static lzo_bool strength_reduce_bug(int *); - -#if 0 || defined(LZO_DEBUG) -static lzo_bool __lzo_assert_fail(const char *s, unsigned line) -{ -#if defined(__palmos__) - printf("LZO assertion failed in line %u: '%s'\n",line,s); -#else - fprintf(stderr,"LZO assertion failed in line %u: '%s'\n",line,s); -#endif - return 0; -} -# define __lzo_assert(x) ((x) ? 1 : __lzo_assert_fail(#x,__LINE__)) -#else -# define __lzo_assert(x) ((x) ? 1 : 0) -#endif - -static lzo_bool basic_integral_check(void) -{ - lzo_bool r = 1; - lzo_bool sanity; - - r &= __lzo_assert(CHAR_BIT == 8); - r &= __lzo_assert(sizeof(char) == 1); - r &= __lzo_assert(sizeof(short) >= 2); - r &= __lzo_assert(sizeof(long) >= 4); - r &= __lzo_assert(sizeof(int) >= sizeof(short)); - r &= __lzo_assert(sizeof(long) >= sizeof(int)); - - r &= __lzo_assert(sizeof(lzo_uint32) >= 4); - r &= __lzo_assert(sizeof(lzo_uint32) >= sizeof(unsigned)); -#if defined(__LZO_STRICT_16BIT) - r &= __lzo_assert(sizeof(lzo_uint) == 2); -#else - r &= __lzo_assert(sizeof(lzo_uint) >= 4); - r &= __lzo_assert(sizeof(lzo_uint) >= sizeof(unsigned)); -#endif - -#if defined(SIZEOF_UNSIGNED) - r &= __lzo_assert(SIZEOF_UNSIGNED == sizeof(unsigned)); -#endif -#if defined(SIZEOF_UNSIGNED_LONG) - r &= __lzo_assert(SIZEOF_UNSIGNED_LONG == sizeof(unsigned long)); -#endif -#if defined(SIZEOF_UNSIGNED_SHORT) - r &= __lzo_assert(SIZEOF_UNSIGNED_SHORT == sizeof(unsigned short)); -#endif -#if !defined(__LZO_IN_MINILZO) -#if defined(SIZEOF_SIZE_T) - r &= __lzo_assert(SIZEOF_SIZE_T == sizeof(size_t)); -#endif -#endif - - sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) && - IS_UNSIGNED(unsigned long) && - IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long); - if (sanity) - { - r &= __lzo_assert(IS_UNSIGNED(lzo_uint32)); - r &= __lzo_assert(IS_UNSIGNED(lzo_uint)); - r &= __lzo_assert(IS_SIGNED(lzo_int32)); - r &= __lzo_assert(IS_SIGNED(lzo_int)); - - r &= __lzo_assert(INT_MAX == LZO_STYPE_MAX(sizeof(int))); - r &= __lzo_assert(UINT_MAX == LZO_UTYPE_MAX(sizeof(unsigned))); - r &= __lzo_assert(LONG_MAX == LZO_STYPE_MAX(sizeof(long))); - r &= __lzo_assert(ULONG_MAX == LZO_UTYPE_MAX(sizeof(unsigned long))); - r &= __lzo_assert(SHRT_MAX == LZO_STYPE_MAX(sizeof(short))); - r &= __lzo_assert(USHRT_MAX == LZO_UTYPE_MAX(sizeof(unsigned short))); - r &= __lzo_assert(LZO_UINT32_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint32))); - r &= __lzo_assert(LZO_UINT_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint))); -#if !defined(__LZO_IN_MINILZO) - r &= __lzo_assert(SIZE_T_MAX == LZO_UTYPE_MAX(sizeof(size_t))); -#endif - } - -#if 0 - r &= __lzo_assert(LZO_BYTE(257) == 1); - r &= __lzo_assert(LZO_USHORT(65537L) == 1); -#endif - - return r; -} - -static lzo_bool basic_ptr_check(void) -{ - lzo_bool r = 1; - lzo_bool sanity; - - r &= __lzo_assert(sizeof(char *) >= sizeof(int)); - r &= __lzo_assert(sizeof(lzo_byte *) >= sizeof(char *)); - - r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_byte *)); - r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_voidpp)); - r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_bytepp)); - r &= __lzo_assert(sizeof(lzo_voidp) >= sizeof(lzo_uint)); - - r &= __lzo_assert(sizeof(lzo_ptr_t) == sizeof(lzo_voidp)); - r &= __lzo_assert(sizeof(lzo_ptr_t) >= sizeof(lzo_uint)); - - r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= 4); - r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= sizeof(ptrdiff_t)); - -#if defined(SIZEOF_CHAR_P) - r &= __lzo_assert(SIZEOF_CHAR_P == sizeof(char *)); -#endif -#if defined(SIZEOF_PTRDIFF_T) - r &= __lzo_assert(SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t)); -#endif - - sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) && - IS_UNSIGNED(unsigned long) && - IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long); - if (sanity) - { - r &= __lzo_assert(IS_UNSIGNED(lzo_ptr_t)); - r &= __lzo_assert(IS_UNSIGNED(lzo_moff_t)); - r &= __lzo_assert(IS_SIGNED(lzo_ptrdiff_t)); - r &= __lzo_assert(IS_SIGNED(lzo_sptr_t)); - } - - return r; -} - -static lzo_bool ptr_check(void) -{ - lzo_bool r = 1; - int i; - char _wrkmem[10 * sizeof(lzo_byte *) + sizeof(lzo_align_t)]; - lzo_byte *wrkmem; - lzo_bytepp dict; - unsigned char x[4 * sizeof(lzo_align_t)]; - long d; - lzo_align_t a; - - for (i = 0; i < (int) sizeof(x); i++) - x[i] = LZO_BYTE(i); - - wrkmem = LZO_PTR_ALIGN_UP((lzo_byte *)_wrkmem,sizeof(lzo_align_t)); - dict = (lzo_bytepp) wrkmem; - - d = (long) ((const lzo_bytep) dict - (const lzo_bytep) _wrkmem); - r &= __lzo_assert(d >= 0); - r &= __lzo_assert(d < (long) sizeof(lzo_align_t)); - - memset(&a,0xff,sizeof(a)); - r &= __lzo_assert(a.a_ushort == USHRT_MAX); - r &= __lzo_assert(a.a_uint == UINT_MAX); - r &= __lzo_assert(a.a_ulong == ULONG_MAX); - r &= __lzo_assert(a.a_lzo_uint == LZO_UINT_MAX); - - if (r == 1) - { - for (i = 0; i < 8; i++) - r &= __lzo_assert((const lzo_voidp) (&dict[i]) == (const lzo_voidp) (&wrkmem[i * sizeof(lzo_byte *)])); - } - - memset(&a,0,sizeof(a)); - r &= __lzo_assert(a.a_charp == NULL); - r &= __lzo_assert(a.a_lzo_bytep == NULL); - //r &= __lzo_assert(NULL == 0); - if (r == 1) - { - for (i = 0; i < 10; i++) - dict[i] = wrkmem; - BZERO8_PTR(dict+1,sizeof(dict[0]),8); - r &= __lzo_assert(dict[0] == wrkmem); - for (i = 1; i < 9; i++) - r &= __lzo_assert(dict[i] == NULL); - r &= __lzo_assert(dict[9] == wrkmem); - } - - if (r == 1) - { - unsigned k = 1; - const unsigned n = (unsigned) sizeof(lzo_uint32); - lzo_byte *p0; - lzo_byte *p1; - - k += __lzo_align_gap(&x[k],n); - p0 = (lzo_bytep) &x[k]; -#if defined(PTR_LINEAR) - r &= __lzo_assert((PTR_LINEAR(p0) & (n-1)) == 0); -#else - r &= __lzo_assert(n == 4); - r &= __lzo_assert(PTR_ALIGNED_4(p0)); -#endif - - r &= __lzo_assert(k >= 1); - p1 = (lzo_bytep) &x[1]; - r &= __lzo_assert(PTR_GE(p0,p1)); - - r &= __lzo_assert(k < 1+n); - p1 = (lzo_bytep) &x[1+n]; - r &= __lzo_assert(PTR_LT(p0,p1)); - - if (r == 1) - { - lzo_uint32 v0 = * (lzo_uint32 *) &x[k]; - lzo_uint32 v1 = * (lzo_uint32 *) &x[k+n]; - - r &= __lzo_assert(v0 > 0); - r &= __lzo_assert(v1 > 0); - } - } - - return r; -} - -LZO_PUBLIC(int) -_lzo_config_check(void) -{ - lzo_bool r = 1; - int i; - union { - lzo_uint32 a; - unsigned short b; - lzo_uint32 aa[4]; - unsigned char x[4*sizeof(lzo_align_t)]; - } u; - -#if 0 - r &= __lzo_assert((const void *)&u == (const void *)&u.a); - r &= __lzo_assert((const void *)&u == (const void *)&u.b); - r &= __lzo_assert((const void *)&u == (const void *)&u.x[0]); - r &= __lzo_assert((const void *)&u == (const void *)&u.aa[0]); -#endif - - r &= basic_integral_check(); - r &= basic_ptr_check(); - if (r != 1) - return LZO_E_ERROR; - - u.a = 0; u.b = 0; - for (i = 0; i < (int) sizeof(u.x); i++) - u.x[i] = LZO_BYTE(i); - -#if 0 - r &= __lzo_assert( (int) (unsigned char) ((char) -1) == 255); -#endif - -#if defined(LZO_BYTE_ORDER) - if (r == 1) - { -# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) - lzo_uint32 a = (lzo_uint32) (u.a & LZO_0xffffffffL); - unsigned short b = (unsigned short) (u.b & 0xffff); - r &= __lzo_assert(a == 0x03020100L); - r &= __lzo_assert(b == 0x0100); -# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) - lzo_uint32 a = u.a >> (8 * sizeof(u.a) - 32); - unsigned short b = u.b >> (8 * sizeof(u.b) - 16); - r &= __lzo_assert(a == 0x00010203L); - r &= __lzo_assert(b == 0x0001); -# else -# error invalid LZO_BYTE_ORDER -# endif - } -#endif - -#if defined(LZO_UNALIGNED_OK_2) - r &= __lzo_assert(sizeof(short) == 2); - if (r == 1) - { - unsigned short b[4]; - - for (i = 0; i < 4; i++) - b[i] = * (const unsigned short *) &u.x[i]; - -# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) - r &= __lzo_assert(b[0] == 0x0100); - r &= __lzo_assert(b[1] == 0x0201); - r &= __lzo_assert(b[2] == 0x0302); - r &= __lzo_assert(b[3] == 0x0403); -# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) - r &= __lzo_assert(b[0] == 0x0001); - r &= __lzo_assert(b[1] == 0x0102); - r &= __lzo_assert(b[2] == 0x0203); - r &= __lzo_assert(b[3] == 0x0304); -# endif - } -#endif - -#if defined(LZO_UNALIGNED_OK_4) - r &= __lzo_assert(sizeof(lzo_uint32) == 4); - if (r == 1) - { - lzo_uint32 a[4]; - - for (i = 0; i < 4; i++) - a[i] = * (const lzo_uint32 *) &u.x[i]; - -# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) - r &= __lzo_assert(a[0] == 0x03020100L); - r &= __lzo_assert(a[1] == 0x04030201L); - r &= __lzo_assert(a[2] == 0x05040302L); - r &= __lzo_assert(a[3] == 0x06050403L); -# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) - r &= __lzo_assert(a[0] == 0x00010203L); - r &= __lzo_assert(a[1] == 0x01020304L); - r &= __lzo_assert(a[2] == 0x02030405L); - r &= __lzo_assert(a[3] == 0x03040506L); -# endif - } -#endif - -#if defined(LZO_ALIGNED_OK_4) - r &= __lzo_assert(sizeof(lzo_uint32) == 4); -#endif - - r &= __lzo_assert(lzo_sizeof_dict_t == sizeof(lzo_dict_t)); - -#if defined(__LZO_IN_MINLZO) - if (r == 1) - { - lzo_uint32 adler; - adler = lzo_adler32(0, NULL, 0); - adler = lzo_adler32(adler, lzo_copyright(), 200); - r &= __lzo_assert(adler == 0x7ea34377L); - } -#endif - - if (r == 1) - { - r &= __lzo_assert(!schedule_insns_bug()); - } - - if (r == 1) - { - static int x[3]; - static unsigned xn = 3; - register unsigned j; - - for (j = 0; j < xn; j++) - x[j] = (int)j - 3; - r &= __lzo_assert(!strength_reduce_bug(x)); - } - - if (r == 1) - { - r &= ptr_check(); - } - - return r == 1 ? LZO_E_OK : LZO_E_ERROR; -} - -static lzo_bool schedule_insns_bug(void) -{ -#if defined(__LZO_CHECKER) - return 0; -#else - const int clone[] = {1, 2, 0}; - const int *q; - q = clone; - return (*q) ? 0 : 1; -#endif -} - -static lzo_bool strength_reduce_bug(int *x) -{ - return x[0] != -3 || x[1] != -2 || x[2] != -1; -} - -int __lzo_init_done = 0; - -LZO_PUBLIC(int) -__lzo_init2(unsigned v, int s1, int s2, int s3, int s4, int s5, - int s6, int s7, int s8, int s9) -{ - int r; - - __lzo_init_done = 1; - - if (v == 0) - return LZO_E_ERROR; - - r = (s1 == -1 || s1 == (int) sizeof(short)) && - (s2 == -1 || s2 == (int) sizeof(int)) && - (s3 == -1 || s3 == (int) sizeof(long)) && - (s4 == -1 || s4 == (int) sizeof(lzo_uint32)) && - (s5 == -1 || s5 == (int) sizeof(lzo_uint)) && - (s6 == -1 || s6 == (int) lzo_sizeof_dict_t) && - (s7 == -1 || s7 == (int) sizeof(char *)) && - (s8 == -1 || s8 == (int) sizeof(lzo_voidp)) && - (s9 == -1 || s9 == (int) sizeof(lzo_compress_t)); - if (!r) - return LZO_E_ERROR; - - r = _lzo_config_check(); - if (r != LZO_E_OK) - return r; - - return r; -} - -#if !defined(__LZO_IN_MINILZO) - -LZO_EXTERN(int) -__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7); - -LZO_PUBLIC(int) -__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7) -{ - if (v == 0 || v > 0x1010) - return LZO_E_ERROR; - return __lzo_init2(v,s1,s2,s3,s4,s5,-1,-1,s6,s7); -} - -#endif - -#define do_compress _lzo1x_1_do_compress - -#define LZO_NEED_DICT_H -#define D_BITS 14 -#define D_INDEX1(d,p) d = DM((0x21*DX3(p,5,5,6)) >> 5) -#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) - -#ifndef __LZO_CONFIG1X_H -#define __LZO_CONFIG1X_H - -#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z) -# define LZO1X -#endif - -#if !defined(__LZO_IN_MINILZO) -#include -#endif - -#define LZO_EOF_CODE -#undef LZO_DETERMINISTIC - -#define M1_MAX_OFFSET 0x0400 -#ifndef M2_MAX_OFFSET -#define M2_MAX_OFFSET 0x0800 -#endif -#define M3_MAX_OFFSET 0x4000 -#define M4_MAX_OFFSET 0xbfff - -#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET) - -#define M1_MIN_LEN 2 -#define M1_MAX_LEN 2 -#define M2_MIN_LEN 3 -#ifndef M2_MAX_LEN -#define M2_MAX_LEN 8 -#endif -#define M3_MIN_LEN 3 -#define M3_MAX_LEN 33 -#define M4_MIN_LEN 3 -#define M4_MAX_LEN 9 - -#define M1_MARKER 0 -#define M2_MARKER 64 -#define M3_MARKER 32 -#define M4_MARKER 16 - -#ifndef MIN_LOOKAHEAD -#define MIN_LOOKAHEAD (M2_MAX_LEN + 1) -#endif - -#if defined(LZO_NEED_DICT_H) - -#ifndef LZO_HASH -#define LZO_HASH LZO_HASH_LZO_INCREMENTAL_B -#endif -#define DL_MIN_LEN M2_MIN_LEN - -#ifndef __LZO_DICT_H -#define __LZO_DICT_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if !defined(D_BITS) && defined(DBITS) -# define D_BITS DBITS -#endif -#if !defined(D_BITS) -# error D_BITS is not defined -#endif -#if (D_BITS < 16) -# define D_SIZE LZO_SIZE(D_BITS) -# define D_MASK LZO_MASK(D_BITS) -#else -# define D_SIZE LZO_USIZE(D_BITS) -# define D_MASK LZO_UMASK(D_BITS) -#endif -#define D_HIGH ((D_MASK >> 1) + 1) - -#if !defined(DD_BITS) -# define DD_BITS 0 -#endif -#define DD_SIZE LZO_SIZE(DD_BITS) -#define DD_MASK LZO_MASK(DD_BITS) - -#if !defined(DL_BITS) -# define DL_BITS (D_BITS - DD_BITS) -#endif -#if (DL_BITS < 16) -# define DL_SIZE LZO_SIZE(DL_BITS) -# define DL_MASK LZO_MASK(DL_BITS) -#else -# define DL_SIZE LZO_USIZE(DL_BITS) -# define DL_MASK LZO_UMASK(DL_BITS) -#endif - -#if (D_BITS != DL_BITS + DD_BITS) -# error D_BITS does not match -#endif -#if (D_BITS < 8 || D_BITS > 18) -# error invalid D_BITS -#endif -#if (DL_BITS < 8 || DL_BITS > 20) -# error invalid DL_BITS -#endif -#if (DD_BITS < 0 || DD_BITS > 6) -# error invalid DD_BITS -#endif - -#if !defined(DL_MIN_LEN) -# define DL_MIN_LEN 3 -#endif -#if !defined(DL_SHIFT) -# define DL_SHIFT ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN) -#endif - -#define LZO_HASH_GZIP 1 -#define LZO_HASH_GZIP_INCREMENTAL 2 -#define LZO_HASH_LZO_INCREMENTAL_A 3 -#define LZO_HASH_LZO_INCREMENTAL_B 4 - -#if !defined(LZO_HASH) -# error choose a hashing strategy -#endif - -#if (DL_MIN_LEN == 3) -# define _DV2_A(p,shift1,shift2) \ - (((( (lzo_uint32)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2]) -# define _DV2_B(p,shift1,shift2) \ - (((( (lzo_uint32)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0]) -# define _DV3_B(p,shift1,shift2,shift3) \ - ((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0]) -#elif (DL_MIN_LEN == 2) -# define _DV2_A(p,shift1,shift2) \ - (( (lzo_uint32)(p[0]) << shift1) ^ p[1]) -# define _DV2_B(p,shift1,shift2) \ - (( (lzo_uint32)(p[1]) << shift1) ^ p[2]) -#else -# error invalid DL_MIN_LEN -#endif -#define _DV_A(p,shift) _DV2_A(p,shift,shift) -#define _DV_B(p,shift) _DV2_B(p,shift,shift) -#define DA2(p,s1,s2) \ - (((((lzo_uint32)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0]) -#define DS2(p,s1,s2) \ - (((((lzo_uint32)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0]) -#define DX2(p,s1,s2) \ - (((((lzo_uint32)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) -#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0]) -#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0]) -#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0]) -#define DMS(v,s) ((lzo_uint) (((v) & (D_MASK >> (s))) << (s))) -#define DM(v) DMS(v,0) - -#if (LZO_HASH == LZO_HASH_GZIP) -# define _DINDEX(dv,p) (_DV_A((p),DL_SHIFT)) - -#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL) -# define __LZO_HASH_INCREMENTAL -# define DVAL_FIRST(dv,p) dv = _DV_A((p),DL_SHIFT) -# define DVAL_NEXT(dv,p) dv = (((dv) << DL_SHIFT) ^ p[2]) -# define _DINDEX(dv,p) (dv) -# define DVAL_LOOKAHEAD DL_MIN_LEN - -#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A) -# define __LZO_HASH_INCREMENTAL -# define DVAL_FIRST(dv,p) dv = _DV_A((p),5) -# define DVAL_NEXT(dv,p) \ - dv ^= (lzo_uint32)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2]) -# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5) -# define DVAL_LOOKAHEAD DL_MIN_LEN - -#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B) -# define __LZO_HASH_INCREMENTAL -# define DVAL_FIRST(dv,p) dv = _DV_B((p),5) -# define DVAL_NEXT(dv,p) \ - dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_uint32)(p[2]) << (2*5))) -# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5) -# define DVAL_LOOKAHEAD DL_MIN_LEN - -#else -# error choose a hashing strategy -#endif - -#ifndef DINDEX -#define DINDEX(dv,p) ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS) -#endif -#if !defined(DINDEX1) && defined(D_INDEX1) -#define DINDEX1 D_INDEX1 -#endif -#if !defined(DINDEX2) && defined(D_INDEX2) -#define DINDEX2 D_INDEX2 -#endif - -#if !defined(__LZO_HASH_INCREMENTAL) -# define DVAL_FIRST(dv,p) ((void) 0) -# define DVAL_NEXT(dv,p) ((void) 0) -# define DVAL_LOOKAHEAD 0 -#endif - -#if !defined(DVAL_ASSERT) -#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG) -static void DVAL_ASSERT(lzo_uint32 dv, const lzo_byte *p) -{ - lzo_uint32 df; - DVAL_FIRST(df,(p)); - assert(DINDEX(dv,p) == DINDEX(df,p)); -} -#else -# define DVAL_ASSERT(dv,p) ((void) 0) -#endif -#endif - -#if defined(LZO_DICT_USE_PTR) -# define DENTRY(p,in) (p) -# define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex] -#else -# define DENTRY(p,in) ((lzo_uint) ((p)-(in))) -# define GINDEX(m_pos,m_off,dict,dindex,in) m_off = dict[dindex] -#endif - -#if (DD_BITS == 0) - -# define UPDATE_D(dict,drun,dv,p,in) dict[ DINDEX(dv,p) ] = DENTRY(p,in) -# define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in) -# define UPDATE_P(ptr,drun,p,in) (ptr)[0] = DENTRY(p,in) - -#else - -# define UPDATE_D(dict,drun,dv,p,in) \ - dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK -# define UPDATE_I(dict,drun,index,p,in) \ - dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK -# define UPDATE_P(ptr,drun,p,in) \ - (ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK - -#endif - -#if defined(LZO_DICT_USE_PTR) - -#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \ - (m_pos == NULL || (m_off = (lzo_moff_t) (ip - m_pos)) > max_offset) - -#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ - (BOUNDS_CHECKING_OFF_IN_EXPR( \ - (PTR_LT(m_pos,in) || \ - (m_off = (lzo_moff_t) PTR_DIFF(ip,m_pos)) <= 0 || \ - m_off > max_offset) )) - -#else - -#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \ - (m_off == 0 || \ - ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \ - (m_pos = (ip) - (m_off), 0) ) - -#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ - ((lzo_moff_t) ((ip)-(in)) <= m_off || \ - ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \ - (m_pos = (ip) - (m_off), 0) ) - -#endif - -#if defined(LZO_DETERMINISTIC) -# define LZO_CHECK_MPOS LZO_CHECK_MPOS_DET -#else -# define LZO_CHECK_MPOS LZO_CHECK_MPOS_NON_DET -#endif - -#ifdef __cplusplus -} -#endif - -#endif - -#endif - -#endif - -#define DO_COMPRESS lzo1x_1_compress - -static -lzo_uint do_compress ( const lzo_byte *in , lzo_uint in_len, - lzo_byte *out, lzo_uint *out_len, - lzo_voidp wrkmem ) -{ -#if 0 && defined(__GNUC__) && defined(__i386__) - register const lzo_byte *ip __asm__("%esi"); -#else - register const lzo_byte *ip; -#endif - lzo_byte *op; - const lzo_byte * const in_end = in + in_len; - const lzo_byte * const ip_end = in + in_len - M2_MAX_LEN - 5; - const lzo_byte *ii; - lzo_dict_p const dict = (lzo_dict_p) wrkmem; - - op = out; - ip = in; - ii = ip; - - ip += 4; - for (;;) - { -#if 0 && defined(__GNUC__) && defined(__i386__) - register const lzo_byte *m_pos __asm__("%edi"); -#else - register const lzo_byte *m_pos; -#endif - lzo_moff_t m_off; - lzo_uint m_len; - lzo_uint dindex; - - DINDEX1(dindex,ip); - GINDEX(m_pos,m_off,dict,dindex,in); - if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) - goto literal; -#if 1 - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - DINDEX2(dindex,ip); -#endif - GINDEX(m_pos,m_off,dict,dindex,in); - if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) - goto literal; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - goto literal; - -try_match: -#if 1 && defined(LZO_UNALIGNED_OK_2) - if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip) -#else - if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) -#endif - { - } - else - { - if (m_pos[2] == ip[2]) - { -#if 0 - if (m_off <= M2_MAX_OFFSET) - goto match; - if (lit <= 3) - goto match; - if (lit == 3) - { - assert(op - 2 > out); op[-2] |= LZO_BYTE(3); - *op++ = *ii++; *op++ = *ii++; *op++ = *ii++; - goto code_match; - } - if (m_pos[3] == ip[3]) -#endif - goto match; - } - else - { -#if 0 -#if 0 - if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3) -#else - if (m_off <= M1_MAX_OFFSET && lit == 3) -#endif - { - register lzo_uint t; - - t = lit; - assert(op - 2 > out); op[-2] |= LZO_BYTE(t); - do *op++ = *ii++; while (--t > 0); - assert(ii == ip); - m_off -= 1; - *op++ = LZO_BYTE(M1_MARKER | ((m_off & 3) << 2)); - *op++ = LZO_BYTE(m_off >> 2); - ip += 2; - goto match_done; - } -#endif - } - } - -literal: - UPDATE_I(dict,0,dindex,ip,in); - ++ip; - if (ip >= ip_end) - break; - continue; - -match: - UPDATE_I(dict,0,dindex,ip,in); - if (ip - ii > 0) - { - register lzo_uint t = ip - ii; - - if (t <= 3) - { - assert(op - 2 > out); - op[-2] |= LZO_BYTE(t); - } - else if (t <= 18) - *op++ = LZO_BYTE(t - 3); - else - { - register lzo_uint tt = t - 18; - - *op++ = 0; - while (tt > 255) - { - tt -= 255; - *op++ = 0; - } - assert(tt > 0); - *op++ = LZO_BYTE(tt); - } - do *op++ = *ii++; while (--t > 0); - } - - assert(ii == ip); - ip += 3; - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ || - m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++ -#ifdef LZO1Y - || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++ - || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++ -#endif - ) - { - --ip; - m_len = ip - ii; - assert(m_len >= 3); assert(m_len <= M2_MAX_LEN); - - if (m_off <= M2_MAX_OFFSET) - { - m_off -= 1; -#if defined(LZO1X) - *op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2)); - *op++ = LZO_BYTE(m_off >> 3); -#elif defined(LZO1Y) - *op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2)); - *op++ = LZO_BYTE(m_off >> 2); -#endif - } - else if (m_off <= M3_MAX_OFFSET) - { - m_off -= 1; - *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); - goto m3_m4_offset; - } - else -#if defined(LZO1X) - { - m_off -= 0x4000; - assert(m_off > 0); assert(m_off <= 0x7fff); - *op++ = LZO_BYTE(M4_MARKER | - ((m_off & 0x4000) >> 11) | (m_len - 2)); - goto m3_m4_offset; - } -#elif defined(LZO1Y) - goto m4_match; -#endif - } - else - { - { - const lzo_byte *end = in_end; - const lzo_byte *m = m_pos + M2_MAX_LEN + 1; - while (ip < end && *m == *ip) - m++, ip++; - m_len = (ip - ii); - } - assert(m_len > M2_MAX_LEN); - - if (m_off <= M3_MAX_OFFSET) - { - m_off -= 1; - if (m_len <= 33) - *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); - else - { - m_len -= 33; - *op++ = M3_MARKER | 0; - goto m3_m4_len; - } - } - else - { -#if defined(LZO1Y) -m4_match: -#endif - m_off -= 0x4000; - assert(m_off > 0); assert(m_off <= 0x7fff); - if (m_len <= M4_MAX_LEN) - *op++ = LZO_BYTE(M4_MARKER | - ((m_off & 0x4000) >> 11) | (m_len - 2)); - else - { - m_len -= M4_MAX_LEN; - *op++ = LZO_BYTE(M4_MARKER | ((m_off & 0x4000) >> 11)); -m3_m4_len: - while (m_len > 255) - { - m_len -= 255; - *op++ = 0; - } - assert(m_len > 0); - *op++ = LZO_BYTE(m_len); - } - } - -m3_m4_offset: - *op++ = LZO_BYTE((m_off & 63) << 2); - *op++ = LZO_BYTE(m_off >> 6); - } - -#if 0 -match_done: -#endif - ii = ip; - if (ip >= ip_end) - break; - } - - *out_len = op - out; - return (lzo_uint) (in_end - ii); -} - -LZO_PUBLIC(int) -DO_COMPRESS ( const lzo_byte *in , lzo_uint in_len, - lzo_byte *out, lzo_uint *out_len, - lzo_voidp wrkmem ) -{ - lzo_byte *op = out; - lzo_uint t; - -#if defined(__LZO_QUERY_COMPRESS) - if (__LZO_IS_COMPRESS_QUERY(in,in_len,out,out_len,wrkmem)) - return __LZO_QUERY_COMPRESS(in,in_len,out,out_len,wrkmem,D_SIZE,lzo_sizeof(lzo_dict_t)); -#endif - - if (in_len <= M2_MAX_LEN + 5) - t = in_len; - else - { - t = do_compress(in,in_len,op,out_len,wrkmem); - op += *out_len; - } - - if (t > 0) - { - const lzo_byte *ii = in + in_len - t; - - if (op == out && t <= 238) - *op++ = LZO_BYTE(17 + t); - else if (t <= 3) - op[-2] |= LZO_BYTE(t); - else if (t <= 18) - *op++ = LZO_BYTE(t - 3); - else - { - lzo_uint tt = t - 18; - - *op++ = 0; - while (tt > 255) - { - tt -= 255; - *op++ = 0; - } - assert(tt > 0); - *op++ = LZO_BYTE(tt); - } - do *op++ = *ii++; while (--t > 0); - } - - *op++ = M4_MARKER | 1; - *op++ = 0; - *op++ = 0; - - *out_len = op - out; - return LZO_E_OK; -} - -#undef do_compress -#undef DO_COMPRESS -#undef LZO_HASH - -#undef LZO_TEST_DECOMPRESS_OVERRUN -#undef LZO_TEST_DECOMPRESS_OVERRUN_INPUT -#undef LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT -#undef LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND -#undef DO_DECOMPRESS -#define DO_DECOMPRESS lzo1x_decompress - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN) -# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) -# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2 -# endif -# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) -# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2 -# endif -# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) -# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND -# endif -#endif - -#undef TEST_IP -#undef TEST_OP -#undef TEST_LOOKBEHIND -#undef NEED_IP -#undef NEED_OP -#undef HAVE_TEST_IP -#undef HAVE_TEST_OP -#undef HAVE_NEED_IP -#undef HAVE_NEED_OP -#undef HAVE_ANY_IP -#undef HAVE_ANY_OP - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) -# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1) -# define TEST_IP (ip < ip_end) -# endif -# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2) -# define NEED_IP(x) \ - if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun -# endif -#endif - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) -# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1) -# define TEST_OP (op <= op_end) -# endif -# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2) -# undef TEST_OP -# define NEED_OP(x) \ - if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun -# endif -#endif - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) -# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun -#else -# define TEST_LOOKBEHIND(m_pos,op) ((void) 0) -#endif - -#if !defined(LZO_EOF_CODE) && !defined(TEST_IP) -# define TEST_IP (ip < ip_end) -#endif - -#if defined(TEST_IP) -# define HAVE_TEST_IP -#else -# define TEST_IP 1 -#endif -#if defined(TEST_OP) -# define HAVE_TEST_OP -#else -# define TEST_OP 1 -#endif - -#if defined(NEED_IP) -# define HAVE_NEED_IP -#else -# define NEED_IP(x) ((void) 0) -#endif -#if defined(NEED_OP) -# define HAVE_NEED_OP -#else -# define NEED_OP(x) ((void) 0) -#endif - -#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP) -# define HAVE_ANY_IP -#endif -#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP) -# define HAVE_ANY_OP -#endif - -#if defined(DO_DECOMPRESS) -LZO_PUBLIC(int) -DO_DECOMPRESS ( const lzo_byte *in , lzo_uint in_len, - lzo_byte *out, lzo_uint *out_len, - lzo_voidp wrkmem ) -#endif -{ - register lzo_byte *op; - register const lzo_byte *ip; - register lzo_uint t; -#if defined(COPY_DICT) - lzo_uint m_off; - const lzo_byte *dict_end; -#else - register const lzo_byte *m_pos; -#endif - - const lzo_byte * const ip_end = in + in_len; -#if defined(HAVE_ANY_OP) - lzo_byte * const op_end = out + *out_len; -#endif -#if defined(LZO1Z) - lzo_uint last_m_off = 0; -#endif - - LZO_UNUSED(wrkmem); - -#if defined(__LZO_QUERY_DECOMPRESS) - if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem)) - return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0); -#endif - -#if defined(COPY_DICT) - if (dict) - { - if (dict_len > M4_MAX_OFFSET) - { - dict += dict_len - M4_MAX_OFFSET; - dict_len = M4_MAX_OFFSET; - } - dict_end = dict + dict_len; - } - else - { - dict_len = 0; - dict_end = NULL; - } -#endif - - *out_len = 0; - - op = out; - ip = in; - - if (*ip > 17) - { - t = *ip++ - 17; - if (t < 4) - goto match_next; - assert(t > 0); NEED_OP(t); NEED_IP(t+1); - do *op++ = *ip++; while (--t > 0); - goto first_literal_run; - } - - while (TEST_IP && TEST_OP) - { - t = *ip++; - if (t >= 16) - goto match; - if (t == 0) - { - NEED_IP(1); - while (*ip == 0) - { - t += 255; - ip++; - NEED_IP(1); - } - t += 15 + *ip++; - } - assert(t > 0); NEED_OP(t+3); NEED_IP(t+4); -#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) -#if !defined(LZO_UNALIGNED_OK_4) - if (PTR_ALIGNED2_4(op,ip)) - { -#endif - * (lzo_uint32p) op = * (const lzo_uint32p) ip; - op += 4; ip += 4; - if (--t > 0) - { - if (t >= 4) - { - do { - * (lzo_uint32p) op = * (const lzo_uint32p) ip; - op += 4; ip += 4; t -= 4; - } while (t >= 4); - if (t > 0) do *op++ = *ip++; while (--t > 0); - } - else - do *op++ = *ip++; while (--t > 0); - } -#if !defined(LZO_UNALIGNED_OK_4) - } - else -#endif -#endif -#if !defined(LZO_UNALIGNED_OK_4) - { - *op++ = *ip++; *op++ = *ip++; *op++ = *ip++; - do *op++ = *ip++; while (--t > 0); - } -#endif - -first_literal_run: - - t = *ip++; - if (t >= 16) - goto match; -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); - last_m_off = m_off; -#else - m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); -#endif - NEED_OP(3); - t = 3; COPY_DICT(t,m_off) -#else -#if defined(LZO1Z) - t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); - m_pos = op - t; - last_m_off = t; -#else - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; -#endif - TEST_LOOKBEHIND(m_pos,out); NEED_OP(3); - *op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos; -#endif - goto match_done; - - while (TEST_IP && TEST_OP) - { -match: - if (t >= 64) - { -#if defined(COPY_DICT) -#if defined(LZO1X) - m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); - t = (t >> 5) - 1; -#elif defined(LZO1Y) - m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); - t = (t >> 4) - 3; -#elif defined(LZO1Z) - m_off = t & 0x1f; - if (m_off >= 0x1c) - m_off = last_m_off; - else - { - m_off = 1 + (m_off << 6) + (*ip++ >> 2); - last_m_off = m_off; - } - t = (t >> 5) - 1; -#endif -#else -#if defined(LZO1X) - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; -#elif defined(LZO1Y) - m_pos = op - 1; - m_pos -= (t >> 2) & 3; - m_pos -= *ip++ << 2; - t = (t >> 4) - 3; -#elif defined(LZO1Z) - { - lzo_uint off = t & 0x1f; - m_pos = op; - if (off >= 0x1c) - { - assert(last_m_off > 0); - m_pos -= last_m_off; - } - else - { - off = 1 + (off << 6) + (*ip++ >> 2); - m_pos -= off; - last_m_off = off; - } - } - t = (t >> 5) - 1; -#endif - TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); - goto copy_match; -#endif - } - else if (t >= 32) - { - t &= 31; - if (t == 0) - { - NEED_IP(1); - while (*ip == 0) - { - t += 255; - ip++; - NEED_IP(1); - } - t += 31 + *ip++; - } -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); - last_m_off = m_off; -#else - m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); -#endif -#else -#if defined(LZO1Z) - { - lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2); - m_pos = op - off; - last_m_off = off; - } -#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) - m_pos = op - 1; - m_pos -= (* (const lzo_ushortp) ip) >> 2; -#else - m_pos = op - 1; - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif -#endif - ip += 2; - } - else if (t >= 16) - { -#if defined(COPY_DICT) - m_off = (t & 8) << 11; -#else - m_pos = op; - m_pos -= (t & 8) << 11; -#endif - t &= 7; - if (t == 0) - { - NEED_IP(1); - while (*ip == 0) - { - t += 255; - ip++; - NEED_IP(1); - } - t += 7 + *ip++; - } -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off += (ip[0] << 6) + (ip[1] >> 2); -#else - m_off += (ip[0] >> 2) + (ip[1] << 6); -#endif - ip += 2; - if (m_off == 0) - goto eof_found; - m_off += 0x4000; -#if defined(LZO1Z) - last_m_off = m_off; -#endif -#else -#if defined(LZO1Z) - m_pos -= (ip[0] << 6) + (ip[1] >> 2); -#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) - m_pos -= (* (const lzo_ushortp) ip) >> 2; -#else - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; -#if defined(LZO1Z) - last_m_off = op - m_pos; -#endif -#endif - } - else - { -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = 1 + (t << 6) + (*ip++ >> 2); - last_m_off = m_off; -#else - m_off = 1 + (t >> 2) + (*ip++ << 2); -#endif - NEED_OP(2); - t = 2; COPY_DICT(t,m_off) -#else -#if defined(LZO1Z) - t = 1 + (t << 6) + (*ip++ >> 2); - m_pos = op - t; - last_m_off = t; -#else - m_pos = op - 1; - m_pos -= t >> 2; - m_pos -= *ip++ << 2; -#endif - TEST_LOOKBEHIND(m_pos,out); NEED_OP(2); - *op++ = *m_pos++; *op++ = *m_pos; -#endif - goto match_done; - } - -#if defined(COPY_DICT) - - NEED_OP(t+3-1); - t += 3-1; COPY_DICT(t,m_off) - -#else - - TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); -#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) -#if !defined(LZO_UNALIGNED_OK_4) - if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) - { - assert((op - m_pos) >= 4); -#else - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) - { -#endif - * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; - op += 4; m_pos += 4; t -= 4 - (3 - 1); - do { - * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; - op += 4; m_pos += 4; t -= 4; - } while (t >= 4); - if (t > 0) do *op++ = *m_pos++; while (--t > 0); - } - else -#endif - { -copy_match: - *op++ = *m_pos++; *op++ = *m_pos++; - do *op++ = *m_pos++; while (--t > 0); - } - -#endif - -match_done: -#if defined(LZO1Z) - t = ip[-1] & 3; -#else - t = ip[-2] & 3; -#endif - if (t == 0) - break; - -match_next: - assert(t > 0); NEED_OP(t); NEED_IP(t+1); - do *op++ = *ip++; while (--t > 0); - t = *ip++; - } - } - -#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) - *out_len = op - out; - return LZO_E_EOF_NOT_FOUND; -#endif - -eof_found: - assert(t == 1); - *out_len = op - out; - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); - -#if defined(HAVE_NEED_IP) -input_overrun: - *out_len = op - out; - return LZO_E_INPUT_OVERRUN; -#endif - -#if defined(HAVE_NEED_OP) -output_overrun: - *out_len = op - out; - return LZO_E_OUTPUT_OVERRUN; -#endif - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) -lookbehind_overrun: - *out_len = op - out; - return LZO_E_LOOKBEHIND_OVERRUN; -#endif -} - -#define LZO_TEST_DECOMPRESS_OVERRUN -#undef DO_DECOMPRESS -#define DO_DECOMPRESS lzo1x_decompress_safe - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN) -# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) -# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2 -# endif -# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) -# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2 -# endif -# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) -# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND -# endif -#endif - -#undef TEST_IP -#undef TEST_OP -#undef TEST_LOOKBEHIND -#undef NEED_IP -#undef NEED_OP -#undef HAVE_TEST_IP -#undef HAVE_TEST_OP -#undef HAVE_NEED_IP -#undef HAVE_NEED_OP -#undef HAVE_ANY_IP -#undef HAVE_ANY_OP - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) -# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1) -# define TEST_IP (ip < ip_end) -# endif -# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2) -# define NEED_IP(x) \ - if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun -# endif -#endif - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) -# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1) -# define TEST_OP (op <= op_end) -# endif -# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2) -# undef TEST_OP -# define NEED_OP(x) \ - if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun -# endif -#endif - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) -# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun -#else -# define TEST_LOOKBEHIND(m_pos,op) ((void) 0) -#endif - -#if !defined(LZO_EOF_CODE) && !defined(TEST_IP) -# define TEST_IP (ip < ip_end) -#endif - -#if defined(TEST_IP) -# define HAVE_TEST_IP -#else -# define TEST_IP 1 -#endif -#if defined(TEST_OP) -# define HAVE_TEST_OP -#else -# define TEST_OP 1 -#endif - -#if defined(NEED_IP) -# define HAVE_NEED_IP -#else -# define NEED_IP(x) ((void) 0) -#endif -#if defined(NEED_OP) -# define HAVE_NEED_OP -#else -# define NEED_OP(x) ((void) 0) -#endif - -#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP) -# define HAVE_ANY_IP -#endif -#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP) -# define HAVE_ANY_OP -#endif - -#if defined(DO_DECOMPRESS) -LZO_PUBLIC(int) -DO_DECOMPRESS ( const lzo_byte *in , lzo_uint in_len, - lzo_byte *out, lzo_uint *out_len, - lzo_voidp wrkmem ) -#endif -{ - register lzo_byte *op; - register const lzo_byte *ip; - register lzo_uint t; -#if defined(COPY_DICT) - lzo_uint m_off; - const lzo_byte *dict_end; -#else - register const lzo_byte *m_pos; -#endif - - const lzo_byte * const ip_end = in + in_len; -#if defined(HAVE_ANY_OP) - lzo_byte * const op_end = out + *out_len; -#endif -#if defined(LZO1Z) - lzo_uint last_m_off = 0; -#endif - - LZO_UNUSED(wrkmem); - -#if defined(__LZO_QUERY_DECOMPRESS) - if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem)) - return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0); -#endif - -#if defined(COPY_DICT) - if (dict) - { - if (dict_len > M4_MAX_OFFSET) - { - dict += dict_len - M4_MAX_OFFSET; - dict_len = M4_MAX_OFFSET; - } - dict_end = dict + dict_len; - } - else - { - dict_len = 0; - dict_end = NULL; - } -#endif - - *out_len = 0; - - op = out; - ip = in; - - if (*ip > 17) - { - t = *ip++ - 17; - if (t < 4) - goto match_next; - assert(t > 0); NEED_OP(t); NEED_IP(t+1); - do *op++ = *ip++; while (--t > 0); - goto first_literal_run; - } - - while (TEST_IP && TEST_OP) - { - t = *ip++; - if (t >= 16) - goto match; - if (t == 0) - { - NEED_IP(1); - while (*ip == 0) - { - t += 255; - ip++; - NEED_IP(1); - } - t += 15 + *ip++; - } - assert(t > 0); NEED_OP(t+3); NEED_IP(t+4); -#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) -#if !defined(LZO_UNALIGNED_OK_4) - if (PTR_ALIGNED2_4(op,ip)) - { -#endif - * (lzo_uint32p) op = * (const lzo_uint32p) ip; - op += 4; ip += 4; - if (--t > 0) - { - if (t >= 4) - { - do { - * (lzo_uint32p) op = * (const lzo_uint32p) ip; - op += 4; ip += 4; t -= 4; - } while (t >= 4); - if (t > 0) do *op++ = *ip++; while (--t > 0); - } - else - do *op++ = *ip++; while (--t > 0); - } -#if !defined(LZO_UNALIGNED_OK_4) - } - else -#endif -#endif -#if !defined(LZO_UNALIGNED_OK_4) - { - *op++ = *ip++; *op++ = *ip++; *op++ = *ip++; - do *op++ = *ip++; while (--t > 0); - } -#endif - -first_literal_run: - - t = *ip++; - if (t >= 16) - goto match; -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); - last_m_off = m_off; -#else - m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); -#endif - NEED_OP(3); - t = 3; COPY_DICT(t,m_off) -#else -#if defined(LZO1Z) - t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); - m_pos = op - t; - last_m_off = t; -#else - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; -#endif - TEST_LOOKBEHIND(m_pos,out); NEED_OP(3); - *op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos; -#endif - goto match_done; - - while (TEST_IP && TEST_OP) - { -match: - if (t >= 64) - { -#if defined(COPY_DICT) -#if defined(LZO1X) - m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); - t = (t >> 5) - 1; -#elif defined(LZO1Y) - m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); - t = (t >> 4) - 3; -#elif defined(LZO1Z) - m_off = t & 0x1f; - if (m_off >= 0x1c) - m_off = last_m_off; - else - { - m_off = 1 + (m_off << 6) + (*ip++ >> 2); - last_m_off = m_off; - } - t = (t >> 5) - 1; -#endif -#else -#if defined(LZO1X) - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; -#elif defined(LZO1Y) - m_pos = op - 1; - m_pos -= (t >> 2) & 3; - m_pos -= *ip++ << 2; - t = (t >> 4) - 3; -#elif defined(LZO1Z) - { - lzo_uint off = t & 0x1f; - m_pos = op; - if (off >= 0x1c) - { - assert(last_m_off > 0); - m_pos -= last_m_off; - } - else - { - off = 1 + (off << 6) + (*ip++ >> 2); - m_pos -= off; - last_m_off = off; - } - } - t = (t >> 5) - 1; -#endif - TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); - goto copy_match; -#endif - } - else if (t >= 32) - { - t &= 31; - if (t == 0) - { - NEED_IP(1); - while (*ip == 0) - { - t += 255; - ip++; - NEED_IP(1); - } - t += 31 + *ip++; - } -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); - last_m_off = m_off; -#else - m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); -#endif -#else -#if defined(LZO1Z) - { - lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2); - m_pos = op - off; - last_m_off = off; - } -#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) - m_pos = op - 1; - m_pos -= (* (const lzo_ushortp) ip) >> 2; -#else - m_pos = op - 1; - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif -#endif - ip += 2; - } - else if (t >= 16) - { -#if defined(COPY_DICT) - m_off = (t & 8) << 11; -#else - m_pos = op; - m_pos -= (t & 8) << 11; -#endif - t &= 7; - if (t == 0) - { - NEED_IP(1); - while (*ip == 0) - { - t += 255; - ip++; - NEED_IP(1); - } - t += 7 + *ip++; - } -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off += (ip[0] << 6) + (ip[1] >> 2); -#else - m_off += (ip[0] >> 2) + (ip[1] << 6); -#endif - ip += 2; - if (m_off == 0) - goto eof_found; - m_off += 0x4000; -#if defined(LZO1Z) - last_m_off = m_off; -#endif -#else -#if defined(LZO1Z) - m_pos -= (ip[0] << 6) + (ip[1] >> 2); -#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) - m_pos -= (* (const lzo_ushortp) ip) >> 2; -#else - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; -#if defined(LZO1Z) - last_m_off = op - m_pos; -#endif -#endif - } - else - { -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = 1 + (t << 6) + (*ip++ >> 2); - last_m_off = m_off; -#else - m_off = 1 + (t >> 2) + (*ip++ << 2); -#endif - NEED_OP(2); - t = 2; COPY_DICT(t,m_off) -#else -#if defined(LZO1Z) - t = 1 + (t << 6) + (*ip++ >> 2); - m_pos = op - t; - last_m_off = t; -#else - m_pos = op - 1; - m_pos -= t >> 2; - m_pos -= *ip++ << 2; -#endif - TEST_LOOKBEHIND(m_pos,out); NEED_OP(2); - *op++ = *m_pos++; *op++ = *m_pos; -#endif - goto match_done; - } - -#if defined(COPY_DICT) - - NEED_OP(t+3-1); - t += 3-1; COPY_DICT(t,m_off) - -#else - - TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1); -#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) -#if !defined(LZO_UNALIGNED_OK_4) - if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) - { - assert((op - m_pos) >= 4); -#else - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) - { -#endif - * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; - op += 4; m_pos += 4; t -= 4 - (3 - 1); - do { - * (lzo_uint32p) op = * (const lzo_uint32p) m_pos; - op += 4; m_pos += 4; t -= 4; - } while (t >= 4); - if (t > 0) do *op++ = *m_pos++; while (--t > 0); - } - else -#endif - { -copy_match: - *op++ = *m_pos++; *op++ = *m_pos++; - do *op++ = *m_pos++; while (--t > 0); - } - -#endif - -match_done: -#if defined(LZO1Z) - t = ip[-1] & 3; -#else - t = ip[-2] & 3; -#endif - if (t == 0) - break; - -match_next: - assert(t > 0); NEED_OP(t); NEED_IP(t+1); - do *op++ = *ip++; while (--t > 0); - t = *ip++; - } - } - -#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) - *out_len = op - out; - return LZO_E_EOF_NOT_FOUND; -#endif - -eof_found: - assert(t == 1); - *out_len = op - out; - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); - -#if defined(HAVE_NEED_IP) -input_overrun: - *out_len = op - out; - return LZO_E_INPUT_OVERRUN; -#endif - -#if defined(HAVE_NEED_OP) -output_overrun: - *out_len = op - out; - return LZO_E_OUTPUT_OVERRUN; -#endif - -#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) -lookbehind_overrun: - *out_len = op - out; - return LZO_E_LOOKBEHIND_OVERRUN; -#endif -} - -/***** End of minilzo.c *****/ - diff -r fd85802f755b -r 628c85c15c7b minilzo.h --- a/minilzo.h Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -/* minilzo.h -- mini subset of the LZO real-time data compression library - - This file is part of the LZO real-time data compression library. - - Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - Markus F.X.J. Oberhumer - - http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html - */ - -/* - * NOTE: - * the full LZO package can be found at - * http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html - */ - - -#ifndef __MINILZO_H -#define __MINILZO_H - -#define MINILZO_VERSION 0x1070 - -#ifdef __LZOCONF_H -# error "you cannot use both LZO and miniLZO" -#endif - -#undef LZO_HAVE_CONFIG_H -#include "lzoconf.h" - -#if !defined(LZO_VERSION) || (LZO_VERSION != MINILZO_VERSION) -# error "version mismatch in header files" -#endif - - -#ifdef __cplusplus -extern "C" { -#endif - - -/*********************************************************************** -// -************************************************************************/ - -/* Memory required for the wrkmem parameter. - * When the required size is 0, you can also pass a NULL pointer. - */ - -#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS -#define LZO1X_1_MEM_COMPRESS ((lzo_uint32) (16384L * lzo_sizeof_dict_t)) -#define LZO1X_MEM_DECOMPRESS (0) - - -/* compression */ -LZO_EXTERN(int) -lzo1x_1_compress ( const lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem ); - -/* decompression */ -LZO_EXTERN(int) -lzo1x_decompress ( const lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem /* NOT USED */ ); - -/* safe decompression with overrun testing */ -LZO_EXTERN(int) -lzo1x_decompress_safe ( const lzo_byte *src, lzo_uint src_len, - lzo_byte *dst, lzo_uint *dst_len, - lzo_voidp wrkmem /* NOT USED */ ); - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* already included */ - diff -r fd85802f755b -r 628c85c15c7b msvidc.c --- a/msvidc.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,393 +0,0 @@ -/* - Microsoft Video 1 Decoder - - (C) 2001 Mike Melanson - - The description of the algorithm you can read here: - http://www.pcisys.net/~melanson/codecs/ - - 32bpp support (c) alex -*/ - -#include "config.h" -#include "bswap.h" - -#define LE_16(x) (le2me_16(*(unsigned short *)(x))) - -#define DECODE_BGR555_TO_BGR888(x) \ - x.c1_b = (x.c1 >> 7) & 0xF8; \ - x.c1_g = (x.c1 >> 2) & 0xF8; \ - x.c1_r = (x.c1 << 3) & 0xF8; \ - x.c2_b = (x.c2 >> 7) & 0xF8; \ - x.c2_g = (x.c2 >> 2) & 0xF8; \ - x.c2_r = (x.c2 << 3) & 0xF8; - -#define DECODE_PALETTE_TO_BGR888(x) \ - x.c1_b = palette_map[x.c1 * 4 + 2]; \ - x.c1_g = palette_map[x.c1 * 4 + 1]; \ - x.c1_r = palette_map[x.c1 * 4 + 0]; \ - x.c2_b = palette_map[x.c2 * 4 + 2]; \ - x.c2_g = palette_map[x.c2 * 4 + 1]; \ - x.c2_r = palette_map[x.c2 * 4 + 0]; - -struct -{ - unsigned short c1, c2; - unsigned char c1_r, c1_g, c1_b; - unsigned char c2_r, c2_g, c2_b; -} quad[2][2]; - -void AVI_Decode_Video1_16( - char *encoded, - int encoded_size, - char *decoded, - int width, - int height, - int bytes_per_pixel) -{ - int block_ptr, pixel_ptr; - int total_blocks; - int pixel_x, pixel_y; // pixel width and height iterators - int block_x, block_y; // block width and height iterators - int blocks_wide, blocks_high; // width and height in 4x4 blocks - int block_inc; - int row_dec; - - // decoding parameters - int stream_ptr; - unsigned char byte_a, byte_b; - unsigned short flags; - int skip_blocks; - - stream_ptr = 0; - skip_blocks = 0; - blocks_wide = width / 4; - blocks_high = height / 4; - total_blocks = blocks_wide * blocks_high; - block_inc = 4 * bytes_per_pixel; - row_dec = (width + 4) * bytes_per_pixel; - - for (block_y = blocks_high; block_y > 0; block_y--) - { - block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel); - for (block_x = blocks_wide; block_x > 0; block_x--) - { - // check if this block should be skipped - if (skip_blocks) - { - block_ptr += block_inc; - skip_blocks--; - total_blocks--; - continue; - } - - pixel_ptr = block_ptr; - - // get the next two bytes in the encoded data stream - byte_a = encoded[stream_ptr++]; - byte_b = encoded[stream_ptr++]; - - // check if the decode is finished - if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0)) - return; - - // check if this is a skip code - else if ((byte_b & 0xFC) == 0x84) - { - // but don't count the current block - skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1; - } - - // check if this is in the 2- or 8-color classes - else if (byte_b < 0x80) - { - flags = (byte_b << 8) | byte_a; - - quad[0][0].c1 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - quad[0][0].c2 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - - DECODE_BGR555_TO_BGR888(quad[0][0]); - - if (quad[0][0].c1 & 0x8000) - { - // 8-color encoding - quad[1][0].c1 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - quad[1][0].c2 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - quad[0][1].c1 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - quad[0][1].c2 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - quad[1][1].c1 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - quad[1][1].c2 = LE_16(&encoded[stream_ptr]); - stream_ptr += 2; - - DECODE_BGR555_TO_BGR888(quad[0][1]); - DECODE_BGR555_TO_BGR888(quad[1][0]); - DECODE_BGR555_TO_BGR888(quad[1][1]); - - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - if (flags & 1) - { - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - else - { - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - - // get the next flag ready to go - flags >>= 1; - } - pixel_ptr -= row_dec; - } - } - else - { - // 2-color encoding - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - if (flags & 1) - { - decoded[pixel_ptr++] = quad[0][0].c1_r; - decoded[pixel_ptr++] = quad[0][0].c1_g; - decoded[pixel_ptr++] = quad[0][0].c1_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - else - { - decoded[pixel_ptr++] = quad[0][0].c2_r; - decoded[pixel_ptr++] = quad[0][0].c2_g; - decoded[pixel_ptr++] = quad[0][0].c2_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - - // get the next flag ready to go - flags >>= 1; - } - pixel_ptr -= row_dec; - } - } - } - - // otherwise, it's a 1-color block - else - { - quad[0][0].c1 = (byte_b << 8) | byte_a; - DECODE_BGR555_TO_BGR888(quad[0][0]); - - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - decoded[pixel_ptr++] = quad[0][0].c1_r; - decoded[pixel_ptr++] = quad[0][0].c1_g; - decoded[pixel_ptr++] = quad[0][0].c1_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - pixel_ptr -= row_dec; - } - } - - block_ptr += block_inc; - total_blocks--; - } - } -} - -void AVI_Decode_Video1_8( - char *encoded, - int encoded_size, - char *decoded, - int width, - int height, - unsigned char *palette_map, - int bytes_per_pixel) -{ - int block_ptr, pixel_ptr; - int total_blocks; - int pixel_x, pixel_y; // pixel width and height iterators - int block_x, block_y; // block width and height iterators - int blocks_wide, blocks_high; // width and height in 4x4 blocks - int block_inc; - int row_dec; - - // decoding parameters - int stream_ptr; - unsigned char byte_a, byte_b; - unsigned short flags; - int skip_blocks; - - stream_ptr = 0; - skip_blocks = 0; - blocks_wide = width / 4; - blocks_high = height / 4; - total_blocks = blocks_wide * blocks_high; - block_inc = 4 * bytes_per_pixel; - row_dec = (width + 4) * bytes_per_pixel; - - for (block_y = blocks_high; block_y > 0; block_y--) - { - block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel); - for (block_x = blocks_wide; block_x > 0; block_x--) - { - // check if this block should be skipped - if (skip_blocks) - { - block_ptr += block_inc; - skip_blocks--; - total_blocks--; - continue; - } - - pixel_ptr = block_ptr; - - // get the next two bytes in the encoded data stream - byte_a = encoded[stream_ptr++]; - byte_b = encoded[stream_ptr++]; - - // check if the decode is finished - if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0)) - return; - - // check if this is a skip code - else if ((byte_b & 0xFC) == 0x84) - { - // but don't count the current block - skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1; - } - - // check if this is a 2-color block - else if (byte_b < 0x80) - { - flags = (byte_b << 8) | byte_a; - - quad[0][0].c1 = (unsigned char)encoded[stream_ptr++]; - quad[0][0].c2 = (unsigned char)encoded[stream_ptr++]; - - DECODE_PALETTE_TO_BGR888(quad[0][0]); - - // 2-color encoding - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - if (flags & 1) - { - decoded[pixel_ptr++] = quad[0][0].c1_r; - decoded[pixel_ptr++] = quad[0][0].c1_g; - decoded[pixel_ptr++] = quad[0][0].c1_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - else - { - decoded[pixel_ptr++] = quad[0][0].c2_r; - decoded[pixel_ptr++] = quad[0][0].c2_g; - decoded[pixel_ptr++] = quad[0][0].c2_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - - // get the next flag ready to go - flags >>= 1; - } - pixel_ptr -= row_dec; - } - } - - // check if it's an 8-color block - else if (byte_b >= 0x90) - { - flags = (byte_b << 8) | byte_a; - - quad[0][0].c1 = (unsigned char)encoded[stream_ptr++]; - quad[0][0].c2 = (unsigned char)encoded[stream_ptr++]; - quad[1][0].c1 = (unsigned char)encoded[stream_ptr++]; - quad[1][0].c2 = (unsigned char)encoded[stream_ptr++]; - - quad[0][1].c1 = (unsigned char)encoded[stream_ptr++]; - quad[0][1].c2 = (unsigned char)encoded[stream_ptr++]; - quad[1][1].c1 = (unsigned char)encoded[stream_ptr++]; - quad[1][1].c2 = (unsigned char)encoded[stream_ptr++]; - - DECODE_PALETTE_TO_BGR888(quad[0][0]); - DECODE_PALETTE_TO_BGR888(quad[0][1]); - DECODE_PALETTE_TO_BGR888(quad[1][0]); - DECODE_PALETTE_TO_BGR888(quad[1][1]); - - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - if (flags & 1) - { - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - else - { - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g; - decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - - // get the next flag ready to go - flags >>= 1; - } - pixel_ptr -= row_dec; - } - } - - // otherwise, it's a 1-color block - else - { - // init c2 along with c1 just so c2 is a known value for macro - quad[0][0].c1 = quad[0][0].c2 = byte_a; - DECODE_PALETTE_TO_BGR888(quad[0][0]); - - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - decoded[pixel_ptr++] = quad[0][0].c1_r; - decoded[pixel_ptr++] = quad[0][0].c1_g; - decoded[pixel_ptr++] = quad[0][0].c1_b; - if (bytes_per_pixel == 4) /* 32bpp */ - pixel_ptr++; - } - pixel_ptr -= row_dec; - } - } - - block_ptr += block_inc; - total_blocks--; - } - } -} - diff -r fd85802f755b -r 628c85c15c7b nuppelvideo.c --- a/nuppelvideo.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ -/* - * NuppelVideo 0.05 file parser - * for MPlayer - * by Panagiotis Issaris - * - * Reworked by alex - */ - -#include -#include -#include - -#include "config.h" -#include "mp_msg.h" - -#include "fastmemcpy.h" - -#include "libmpdemux/nuppelvideo.h" -#include "RTjpegN.h" -#include "minilzo.h" - -#define KEEP_BUFFER - -void decode_nuv( unsigned char *encoded, int encoded_size, - unsigned char *decoded, int width, int height) -{ - int r; - unsigned int out_len; - struct rtframeheader *encodedh = ( struct rtframeheader* ) encoded; - static unsigned char *buffer = 0; /* for RTJpeg with LZO decompress */ -#ifdef KEEP_BUFFER - static unsigned char *previous_buffer = 0; /* to support Last-frame-copy */ -#endif - static int is_lzo_inited = 0; - -// printf("frametype: %c, comtype: %c, encoded_size: %d, width: %d, height: %d\n", -// encodedh->frametype, encodedh->comptype, encoded_size, width, height); - - switch(encodedh->frametype) - { - case 'D': /* additional data for compressors */ - { - /* tables are in encoded */ - if (encodedh->comptype == 'R') - { - RTjpeg_init_decompress ( encoded+12, width, height ); - mp_msg(MSGT_DECVIDEO, MSGL_V, "Found RTjpeg tables (size: %d, width: %d, height: %d)\n", - encoded_size-12, width, height); - } - break; - } - case 'V': - { -#ifdef KEEP_BUFFER - if (!previous_buffer) - previous_buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 ); -#endif - - if (((encodedh->comptype == '2') || - (encodedh->comptype == '3')) && !is_lzo_inited) - { - /* frame using lzo, init lzo first if not inited */ - if ( lzo_init() != LZO_E_OK ) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, "LZO init failed\n"); - return; - } - is_lzo_inited = 1; - } - - switch(encodedh->comptype) - { - case '0': /* raw YUV420 */ - memcpy(decoded, encoded + 12, width*height*3/2); - break; - case '1': /* RTJpeg */ - RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded ); - break; - case '2': /* RTJpeg with LZO */ - if (!buffer) - buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 ); - if (!buffer) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); - break; - } - r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, buffer, &out_len, NULL ); - if ( r != LZO_E_OK ) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); - break; - } - RTjpeg_decompressYUV420 ( ( __s8 * ) buffer, decoded ); - break; - case '3': /* raw YUV420 with LZO */ - r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, decoded, &out_len, NULL ); - if ( r != LZO_E_OK ) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); - break; - } - break; - case 'N': /* black frame */ - memset ( decoded, 0, width * height ); - memset ( decoded + width * height, 127, width * height / 2); - break; - case 'L': /* copy last frame */ -#ifdef KEEP_BUFFER - memcpy ( decoded, previous_buffer, width*height*3/2); -#endif - break; - } - -#ifdef KEEP_BUFFER - memcpy(previous_buffer, decoded, width*height*3/2); -#endif - break; - } - default: - mp_msg(MSGT_DECVIDEO, MSGL_V, "Nuppelvideo: unknwon frametype: %c\n", - encodedh->frametype); - } -} diff -r fd85802f755b -r 628c85c15c7b qtrle.c --- a/qtrle.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,125 +0,0 @@ -/* - Quicktime Animation (RLE) Decoder for MPlayer - - (C) 2001 Mike Melanson -*/ - -#include "config.h" -#include "bswap.h" - -#define BE_16(x) (be2me_16(*(unsigned short *)(x))) -#define BE_32(x) (be2me_32(*(unsigned int *)(x))) - -// 256 RGB entries; 25% of these bytes will be unused, but it's faster -// to index 4-byte entries -static unsigned char palette[256 * 4]; - -void qt_decode_rle24( - unsigned char *encoded, - int encoded_size, - unsigned char *decoded, - int width, - int height, - int bytes_per_pixel) -{ - int stream_ptr; - int header; - int start_line; - int lines_to_change; - signed char rle_code; - int row_ptr, pixel_ptr; - int row_inc = bytes_per_pixel * width; - unsigned char r, g, b; - - // check if this frame is even supposed to change - if (encoded_size < 8) - return; - - // start after the chunk size - stream_ptr = 4; - - // fetch the header - header = BE_16(&encoded[stream_ptr]); - stream_ptr += 2; - - // if a header is present, fetch additional decoding parameters - if (header & 0x0008) - { - start_line = BE_16(&encoded[stream_ptr]); - stream_ptr += 4; - lines_to_change = BE_16(&encoded[stream_ptr]); - stream_ptr += 4; - } - else - { - start_line = 0; - lines_to_change = height; - } - - row_ptr = row_inc * start_line; - while (lines_to_change--) - { - pixel_ptr = row_ptr + ((encoded[stream_ptr++] - 1) * bytes_per_pixel); - - while ((rle_code = (signed char)encoded[stream_ptr++]) != -1) - { - if (rle_code == 0) - // there's another skip code in the stream - pixel_ptr += ((encoded[stream_ptr++] - 1) * bytes_per_pixel); - else if (rle_code < 0) - { - // decode the run length code - rle_code = -rle_code; - r = encoded[stream_ptr++]; - g = encoded[stream_ptr++]; - b = encoded[stream_ptr++]; - while (rle_code--) - { - decoded[pixel_ptr++] = b; - decoded[pixel_ptr++] = g; - decoded[pixel_ptr++] = r; - if (bytes_per_pixel == 4) - pixel_ptr++; - } - } - else - { - // copy pixels directly to output - while (rle_code--) - { - decoded[pixel_ptr++] = encoded[stream_ptr + 2]; - decoded[pixel_ptr++] = encoded[stream_ptr + 1]; - decoded[pixel_ptr++] = encoded[stream_ptr + 0]; - stream_ptr += 3; - if (bytes_per_pixel == 4) - pixel_ptr++; - } - } - } - - row_ptr += row_inc; - } -} - -void qt_decode_rle( - unsigned char *encoded, - int encoded_size, - unsigned char *decoded, - int width, - int height, - int encoded_bpp, - int bytes_per_pixel) -{ - switch (encoded_bpp) - { - case 24: - qt_decode_rle24( - encoded, - encoded_size, - decoded, - width, - height, - bytes_per_pixel); - break; - } -} diff -r fd85802f755b -r 628c85c15c7b qtrpza.c --- a/qtrpza.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,264 +0,0 @@ -/* - * - * Apple Video (rpza) QuickTime Decoder for Mplayer - * (c) 2002 Roberto Togni - * - * Fourcc: rpza, azpr - * - * Some code comes from qtsmc.c by Mike Melanson - * - * A description of the decoding algorithm can be found here: - * http://www.pcisys.net/~melanson/codecs/ - */ - -#include "config.h" -#include "bswap.h" -#include "mp_msg.h" - -#define BE_16(x) (be2me_16(*(unsigned short *)(x))) -#define BE_32(x) (be2me_32(*(unsigned int *)(x))) - - -#define ADVANCE_BLOCK() \ -{ \ - pixel_ptr += block_x_inc; \ - if (pixel_ptr >= (width * bytes_per_pixel)) \ - { \ - pixel_ptr = 0; \ - row_ptr += block_y_inc * 4; \ - } \ - total_blocks--; \ - if (total_blocks < 0) \ - { \ - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \ - return; \ - } \ -} - -#define PAINT_CURRENT_PIXEL(r, g, b, color) \ -{ \ - if (bytes_per_pixel == 2) { \ - (*(unsigned short*)(&decoded[block_ptr])) = color & 0x7fff; \ - block_ptr += 2; \ - } else { \ - decoded[block_ptr++] = (b); \ - decoded[block_ptr++] = (g); \ - decoded[block_ptr++] = (r); \ - if (bytes_per_pixel == 4) /* 32bpp */ \ - block_ptr++; \ - } \ -} - -#define COLOR_FIX(col_out, col_in) (col_out) = ((col_in) << 3) | ((col_in) >> 2) - -#define COLOR_TO_RGB(r, g, b, color) \ -{ \ - if (bytes_per_pixel != 2) { \ - unsigned short tmp; \ - tmp = (color >> 10) & 0x1f; \ - COLOR_FIX (r, tmp); \ - tmp = (color >> 5) & 0x1f; \ - COLOR_FIX (g, tmp); \ - tmp = color & 0x1f; \ - COLOR_FIX (b, tmp); \ - } \ -} - -#define COLORAB_TO_RGB4(rgb4, color4, colorA, colorB) \ -{ \ - unsigned short ta, tb, tt; \ - if (bytes_per_pixel != 2) { \ - ta = (colorA >> 10) & 0x1f; \ - tb = (colorB >> 10) & 0x1f; \ - COLOR_FIX (rgb4[3][0], ta); \ - COLOR_FIX (rgb4[0][0], tb); \ - tt = (11 * ta + 21 * tb) >> 5; \ - COLOR_FIX (rgb4[1][0], tt); \ - tt = (21 * ta + 11 * tb) >> 5; \ - COLOR_FIX (rgb4[2][0], tt); \ - ta = (colorA >> 5) & 0x1f; \ - tb = (colorB >> 5) & 0x1f; \ - COLOR_FIX (rgb4[3][1], ta); \ - COLOR_FIX (rgb4[0][1], tb); \ - tt = (11 * ta + 21 * tb) >> 5; \ - COLOR_FIX (rgb4[1][1], tt); \ - tt = (21 * ta + 11 * tb) >> 5; \ - COLOR_FIX (rgb4[2][1], tt); \ - ta = colorA & 0x1f; \ - tb = colorB & 0x1f; \ - COLOR_FIX (rgb4[3][2], ta); \ - COLOR_FIX (rgb4[0][2], tb); \ - tt = (11 * ta + 21 * tb) >> 5; \ - COLOR_FIX (rgb4[1][2], tt); \ - tt = (21 * ta + 11 * tb) >> 5; \ - COLOR_FIX (rgb4[2][2], tt); \ - } else { \ - color4[3] = colorA; \ - color4[0] = colorB; \ - ta = (colorA >> 10) & 0x1f; \ - tb = (colorB >> 10) & 0x1f; \ - color4[1] = ((11 * ta + 21 * tb) << 5) & 0x7c00; \ - color4[2] = ((21 * ta + 11 * tb) << 5) & 0x7c00; \ - ta = (colorA >> 5) & 0x1f; \ - tb = (colorB >> 5) & 0x1f; \ - color4[1] |= (11 * ta + 21 * tb) & 0x3e0; \ - color4[2] |= (21 * ta + 11 * tb) & 0x3e0; \ - ta = colorA & 0x1f; \ - tb = colorB & 0x1f; \ - color4[1] |= (11 * ta + 21 * tb) >> 5; \ - color4[2] |= (21 * ta + 11 * tb) >> 5; \ - } \ -} - - - -/* - * rpza frame decoder - * - * Input values: - * - * *encoded: buffer of encoded data (chunk) - * encoded_size: length of encoded buffer - * *decoded: buffer where decoded data is written (image buffer) - * width: width of decoded frame in pixels - * height: height of decoded frame in pixels - * bytes_per_pixel: bytes/pixel in output image (color depth) - * - */ - -void qt_decode_rpza(char *encoded, int encoded_size, char *decoded, int width, - int height, int bytes_per_pixel) -{ - - int i; - int stream_ptr = 0; - int chunk_size; - unsigned char opcode; - int n_blocks; - unsigned short colorA, colorB; - unsigned char r, g, b; - unsigned char rgb4[4][3]; - unsigned short color4[4]; - unsigned char index, idx; - - int row_ptr = 0; - int pixel_ptr = 0; - int pixel_x, pixel_y; - int row_inc = bytes_per_pixel * (width - 4); - int max_height = row_inc * height; - int block_x_inc = bytes_per_pixel * 4; - int block_y_inc = bytes_per_pixel * width; - int block_ptr; - int total_blocks; - - - /* First byte is always 0xe1. Warn if it's different */ - if ((unsigned char)encoded[stream_ptr] != 0xe1) - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "First chunk byte is 0x%02x instead of 0x1e\n", - (unsigned char)encoded[stream_ptr]); - - /* Get chunk size, ingnoring first byte */ - chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF; - stream_ptr += 4; - - /* If length mismatch use size from MOV file and try to decode anyway */ - if (chunk_size != encoded_size) - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n"); - - chunk_size = encoded_size; - - /* Number of 4x4 blocks in frame. */ - total_blocks = (width * height) / (4 * 4); - - /* Process chunk data */ - while (stream_ptr < chunk_size) { - opcode = encoded[stream_ptr++]; /* Get opcode */ - - n_blocks = (opcode & 0x1f) +1; /* Extract block counter from opcode */ - - /* If opcode MSbit is 0, we need more data to decide what to do */ - if ((opcode & 0x80) == 0) { - colorA = (opcode << 8) | ((unsigned char)encoded[stream_ptr++]); - opcode = 0; - if ((encoded[stream_ptr] & 0x80) != 0) { - /* Must behave as opcode 110xxxxx, using colorA computed above.*/ - /* Use fake opcode 0x20 to enter switch block at the right place */ - opcode = 0x20; - n_blocks = 1; - } - } - switch (opcode & 0xe0) { - /* Skip blocks */ - case 0x80: - while (n_blocks--) - ADVANCE_BLOCK(); - break; - - /* Fill blocks with one color */ - case 0xa0: - colorA = BE_16 (&encoded[stream_ptr]); - stream_ptr += 2; - COLOR_TO_RGB (r, g, b, colorA); - while (n_blocks--) { - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) { - for (pixel_x = 0; pixel_x < 4; pixel_x++){ - PAINT_CURRENT_PIXEL(r, g, b, colorA); - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - /* Fill blocks with 4 colors */ - case 0xc0: - colorA = BE_16 (&encoded[stream_ptr]); - stream_ptr += 2; - case 0x20: - colorB = BE_16 (&encoded[stream_ptr]); - stream_ptr += 2; - COLORAB_TO_RGB4 (rgb4, color4, colorA, colorB); - while (n_blocks--) { - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) { - index = encoded[stream_ptr++]; - for (pixel_x = 0; pixel_x < 4; pixel_x++){ - idx = (index >> (2 * (3 - pixel_x))) & 0x03; - PAINT_CURRENT_PIXEL(rgb4[idx][0], rgb4[idx][1], rgb4[idx][2], color4[idx]); - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - /* Fill block with 16 colors */ - case 0x00: - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) { - for (pixel_x = 0; pixel_x < 4; pixel_x++){ - /* We already have color of upper left pixel */ - if ((pixel_y != 0) || (pixel_x !=0)) { - colorA = BE_16 (&encoded[stream_ptr]); - stream_ptr += 2; - } - COLOR_TO_RGB (r, g, b, colorA); - PAINT_CURRENT_PIXEL(r, g, b, colorA); - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - break; - - /* Unknown opcode */ - default: - mp_msg(MSGT_DECVIDEO, MSGL_HINT, "Unknown opcode %d in rpza chunk." - " Skip remaining %lu bytes of chunk data.\n", opcode, - chunk_size - stream_ptr); - return; - } /* Opcode switch */ - - } -} diff -r fd85802f755b -r 628c85c15c7b qtsmc.c --- a/qtsmc.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,513 +0,0 @@ -/* - Apple Graphics (SMC) Decoder for MPlayer - by Mike Melanson - Special thanks for Roberto Togni for - tracking down the final, nagging bugs. - - The description of the decoding algorithm can be found here: - http://www.pcisys.net/~melanson/codecs/ -*/ - -#include -#include "config.h" -#include "bswap.h" -#include "mp_msg.h" - -#define BE_16(x) (be2me_16(*(unsigned short *)(x))) -#define BE_32(x) (be2me_32(*(unsigned int *)(x))) - -#define COLORS_PER_TABLE 256 -#define BYTES_PER_COLOR 4 - -#define CPAIR 2 -#define CQUAD 4 -#define COCTET 8 - -static unsigned char *color_pairs; -static unsigned char *color_quads; -static unsigned char *color_octets; - -static int color_pair_index; -static int color_quad_index; -static int color_octet_index; - -static int smc_initialized; - -// returns 0 if successfully initialized (enough memory was available), -// non-zero on failure -int qt_init_decode_smc(void) -{ - // be pessimistic to start - smc_initialized = 0; - - // allocate memory for the 3 palette tables - if ((color_pairs = (unsigned char *)malloc( - COLORS_PER_TABLE * BYTES_PER_COLOR * 2)) == 0) - return 1; - if ((color_quads = (unsigned char *)malloc( - COLORS_PER_TABLE * BYTES_PER_COLOR * 4)) == 0) - return 1; - if ((color_octets = (unsigned char *)malloc( - COLORS_PER_TABLE * BYTES_PER_COLOR * 8)) == 0) - return 1; - - // if execution got this far, initialization succeeded - smc_initialized = 1; - return 0; -} - -#define GET_BLOCK_COUNT \ - (opcode & 0x10) ? (1 + encoded[stream_ptr++]) : 1 + (opcode & 0x0F); -#define ADVANCE_BLOCK() \ -{ \ - pixel_ptr += block_x_inc; \ - if (pixel_ptr >= byte_width) \ - { \ - pixel_ptr = 0; \ - row_ptr += block_y_inc * 4; \ - } \ - total_blocks--; \ - if (total_blocks < 0) \ - { \ - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \ - return; \ - } \ -} - -void qt_decode_smc( - unsigned char *encoded, - int encoded_size, - unsigned char *decoded, - int pixel_width, - int pixel_height, - unsigned char *palette_map, - int bytes_per_pixel) -{ - int i; - int stream_ptr = 0; - int chunk_size; - unsigned char opcode; - int n_blocks; - unsigned int color_flags; - unsigned int color_flags_a; - unsigned int color_flags_b; - unsigned int flag_mask; - - int byte_width = pixel_width * bytes_per_pixel; // width of a row in bytes - int byte_height = pixel_height * byte_width; // max image size, basically - int row_ptr = 0; - int pixel_ptr = 0; - int pixel_x, pixel_y; - int row_inc = bytes_per_pixel * (pixel_width - 4); - int block_x_inc = bytes_per_pixel * 4; - int block_y_inc = bytes_per_pixel * pixel_width; - int block_ptr; - int prev_block_ptr; - int prev_block_ptr1, prev_block_ptr2; - int prev_block_flag; - int total_blocks; - int color_table_index; // indexes to color pair, quad, or octet tables - int color_index; // indexes into palette map - - if (!smc_initialized) - return; - - // reset color tables - color_pair_index = 0; - color_quad_index = 0; - color_octet_index = 0; - - chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF; - stream_ptr += 4; - if (chunk_size != encoded_size) - mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n"); - - chunk_size = encoded_size; - total_blocks = (pixel_width * pixel_height) / (4 * 4); - - // traverse through the blocks - while (total_blocks) - { - // sanity checks - // make sure stream ptr hasn't gone out of bounds - if (stream_ptr > chunk_size) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, - "SMC decoder just went out of bounds (stream ptr = %d, chunk size = %d)\n", - stream_ptr, chunk_size); - return; - } - // make sure the row pointer hasn't gone wild - if (row_ptr >= byte_height) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, - "SMC decoder just went out of bounds (row ptr = %d, height = %d)\n", - row_ptr, byte_height); - return; - } - - opcode = encoded[stream_ptr++]; - switch (opcode & 0xF0) - { - // skip n blocks - case 0x00: - case 0x10: - n_blocks = GET_BLOCK_COUNT; - while (n_blocks--) - ADVANCE_BLOCK(); - break; - - // repeat last block n times - case 0x20: - case 0x30: - n_blocks = GET_BLOCK_COUNT; - - // sanity check - if ((row_ptr == 0) && (pixel_ptr == 0)) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "encountered repeat block opcode (%02X) but no blocks rendered yet\n", - opcode & 0xF0); - break; - } - - // figure out where the previous block started - if (pixel_ptr == 0) - prev_block_ptr1 = (row_ptr - block_y_inc * 4) + - byte_width - block_x_inc; - else - prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc; - - while (n_blocks--) - { - block_ptr = row_ptr + pixel_ptr; - prev_block_ptr = prev_block_ptr1; - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - decoded[block_ptr++] = decoded[prev_block_ptr++]; - decoded[block_ptr++] = decoded[prev_block_ptr++]; - decoded[block_ptr++] = decoded[prev_block_ptr++]; - if (bytes_per_pixel == 4) /* 32bpp */ - { - block_ptr++; - prev_block_ptr++; - } - } - block_ptr += row_inc; - prev_block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - // repeat previous pair of blocks n times - case 0x40: - case 0x50: - n_blocks = GET_BLOCK_COUNT; - n_blocks *= 2; - - // sanity check - if ((row_ptr == 0) && (pixel_ptr < 2 * block_x_inc)) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "encountered repeat block opcode (%02X) but not enough blocks rendered yet\n", - opcode & 0xF0); - break; - } - - // figure out where the previous 2 blocks started - if (pixel_ptr == 0) - prev_block_ptr1 = (row_ptr - block_y_inc * 4) + - byte_width - block_x_inc * 2; - else if (pixel_ptr == block_x_inc) - prev_block_ptr1 = (row_ptr - block_y_inc * 4) + - byte_width - block_x_inc; - else - prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc * 2; - - if (pixel_ptr == 0) - prev_block_ptr2 = (row_ptr - block_y_inc * 4) + - (byte_width - block_x_inc); - else - prev_block_ptr2 = row_ptr + pixel_ptr - block_x_inc; - - prev_block_flag = 0; - while (n_blocks--) - { - block_ptr = row_ptr + pixel_ptr; - if (prev_block_flag) - prev_block_ptr = prev_block_ptr2; - else - prev_block_ptr = prev_block_ptr1; - prev_block_flag = !prev_block_flag; - - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - decoded[block_ptr++] = decoded[prev_block_ptr++]; - decoded[block_ptr++] = decoded[prev_block_ptr++]; - decoded[block_ptr++] = decoded[prev_block_ptr++]; - if (bytes_per_pixel == 4) /* 32bpp */ - { - block_ptr++; - prev_block_ptr++; - } - } - block_ptr += row_inc; - prev_block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - // 1-color block encoding - case 0x60: - case 0x70: - n_blocks = GET_BLOCK_COUNT; - color_index = encoded[stream_ptr++] * 4; - - while (n_blocks--) - { - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - decoded[block_ptr++] = palette_map[color_index + 0]; - decoded[block_ptr++] = palette_map[color_index + 1]; - decoded[block_ptr++] = palette_map[color_index + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - block_ptr++; - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - // 2-color block encoding - case 0x80: - case 0x90: - n_blocks = (opcode & 0x0F) + 1; - - // figure out which color pair to use to paint the 2-color block - if ((opcode & 0xF0) == 0x80) - { - // fetch the next 2 colors from bytestream and store in next - // available entry in the color pair table - for (i = 0; i < CPAIR; i++) - { - color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; - color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index + - (i * BYTES_PER_COLOR); - color_pairs[color_table_index + 0] = palette_map[color_index + 0]; - color_pairs[color_table_index + 1] = palette_map[color_index + 1]; - color_pairs[color_table_index + 2] = palette_map[color_index + 2]; - } - // this is the base index to use for this block - color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index; - color_pair_index++; - if (color_pair_index == COLORS_PER_TABLE) - color_pair_index = 0; - } - else - color_table_index = CPAIR * BYTES_PER_COLOR * encoded[stream_ptr++]; - - while (n_blocks--) - { - color_flags = BE_16(&encoded[stream_ptr]); - stream_ptr += 2; - flag_mask = 0x8000; - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - if (color_flags & flag_mask) - color_index = color_table_index + BYTES_PER_COLOR; - else - color_index = color_table_index; - flag_mask >>= 1; - - decoded[block_ptr++] = color_pairs[color_index + 0]; - decoded[block_ptr++] = color_pairs[color_index + 1]; - decoded[block_ptr++] = color_pairs[color_index + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - block_ptr++; - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - // 4-color block encoding - case 0xA0: - case 0xB0: - n_blocks = (opcode & 0x0F) + 1; - - // figure out which color quad to use to paint the 4-color block - if ((opcode & 0xF0) == 0xA0) - { - // fetch the next 4 colors from bytestream and store in next - // available entry in the color quad table - for (i = 0; i < CQUAD; i++) - { - color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; - color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index + - (i * BYTES_PER_COLOR); - color_quads[color_table_index + 0] = palette_map[color_index + 0]; - color_quads[color_table_index + 1] = palette_map[color_index + 1]; - color_quads[color_table_index + 2] = palette_map[color_index + 2]; - } - // this is the base index to use for this block - color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index; - color_quad_index++; - if (color_quad_index == COLORS_PER_TABLE) - color_quad_index = 0; - } - else - color_table_index = CQUAD * BYTES_PER_COLOR * encoded[stream_ptr++]; - - while (n_blocks--) - { - color_flags = BE_32(&encoded[stream_ptr]); - stream_ptr += 4; - // flag mask actually acts as a bit shift count here - flag_mask = 30; - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - color_index = color_table_index + (BYTES_PER_COLOR * - ((color_flags >> flag_mask) & 0x03)); - flag_mask -= 2; - - decoded[block_ptr++] = color_quads[color_index + 0]; - decoded[block_ptr++] = color_quads[color_index + 1]; - decoded[block_ptr++] = color_quads[color_index + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - block_ptr++; - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - // 8-color block encoding - case 0xC0: - case 0xD0: - n_blocks = (opcode & 0x0F) + 1; - - // figure out which color octet to use to paint the 8-color block - if ((opcode & 0xF0) == 0xC0) - { - // fetch the next 8 colors from bytestream and store in next - // available entry in the color octet table - for (i = 0; i < COCTET; i++) - { - color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; - color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index + - (i * BYTES_PER_COLOR); - color_octets[color_table_index + 0] = palette_map[color_index + 0]; - color_octets[color_table_index + 1] = palette_map[color_index + 1]; - color_octets[color_table_index + 2] = palette_map[color_index + 2]; - } - // this is the base index to use for this block - color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index; - color_octet_index++; - if (color_octet_index == COLORS_PER_TABLE) - color_octet_index = 0; - } - else - color_table_index = COCTET * BYTES_PER_COLOR * encoded[stream_ptr++]; - - while (n_blocks--) - { - /* - For this input: - 01 23 45 67 89 AB - This is the output: - flags_a = xx012456, flags_b = xx89A37B - */ - // build the color flags - color_flags_a = color_flags_b = 0; - color_flags_a = - (encoded[stream_ptr + 0] << 16) | - ((encoded[stream_ptr + 1] & 0xF0) << 8) | - ((encoded[stream_ptr + 2] & 0xF0) << 4) | - ((encoded[stream_ptr + 2] & 0x0F) << 4) | - ((encoded[stream_ptr + 3] & 0xF0) >> 4); - color_flags_b = - (encoded[stream_ptr + 4] << 16) | - ((encoded[stream_ptr + 5] & 0xF0) << 8) | - ((encoded[stream_ptr + 1] & 0x0F) << 8) | - ((encoded[stream_ptr + 3] & 0x0F) << 4) | - (encoded[stream_ptr + 5] & 0x0F); - stream_ptr += 6; - - color_flags = color_flags_a; - // flag mask actually acts as a bit shift count here - flag_mask = 21; - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - // reload flags at third row (iteration pixel_y == 2) - if (pixel_y == 2) - { - color_flags = color_flags_b; - flag_mask = 21; - } - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - color_index = color_table_index + (BYTES_PER_COLOR * - ((color_flags >> flag_mask) & 0x07)); - flag_mask -= 3; - - decoded[block_ptr++] = color_octets[color_index + 0]; - decoded[block_ptr++] = color_octets[color_index + 1]; - decoded[block_ptr++] = color_octets[color_index + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - block_ptr++; - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - // 16-color block encoding (every pixel is a different color) - case 0xE0: - n_blocks = (opcode & 0x0F) + 1; - - while (n_blocks--) - { - block_ptr = row_ptr + pixel_ptr; - for (pixel_y = 0; pixel_y < 4; pixel_y++) - { - for (pixel_x = 0; pixel_x < 4; pixel_x++) - { - color_index = encoded[stream_ptr++] * BYTES_PER_COLOR; - decoded[block_ptr++] = palette_map[color_index + 0]; - decoded[block_ptr++] = palette_map[color_index + 1]; - decoded[block_ptr++] = palette_map[color_index + 2]; - if (bytes_per_pixel == 4) /* 32bpp */ - block_ptr++; - } - block_ptr += row_inc; - } - ADVANCE_BLOCK(); - } - break; - - case 0xF0: - mp_msg(MSGT_DECVIDEO, MSGL_HINT, "0xF0 opcode seen in SMC chunk (MPlayer developers would like to know)\n"); - break; - } - } -} diff -r fd85802f755b -r 628c85c15c7b roqav.c --- a/roqav.c Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,670 +0,0 @@ -/* - RoQ A/V decoder for the MPlayer program - by Mike Melanson - based on Dr. Tim Ferguson's RoQ document and accompanying source - code found at: - http://www.csse.monash.edu.au/~timf/videocodec.html -*/ - -#include -#include -#include "config.h" -#include "bswap.h" -#include "mp_msg.h" -#include "mp_image.h" - -#define LE_16(x) (le2me_16(*(unsigned short *)(x))) -#define LE_32(x) (le2me_32(*(unsigned int *)(x))) - -#define CLAMP_S16(x) if (x < -32768) x = -32768; \ - else if (x > 32767) x = 32767; -#define SE_16BIT(x) if (x & 0x8000) x -= 0x10000; - -// RoQ chunk types -#define RoQ_INFO 0x1001 -#define RoQ_QUAD_CODEBOOK 0x1002 -#define RoQ_QUAD_VQ 0x1011 -#define RoQ_SOUND_MONO 0x1020 -#define RoQ_SOUND_STEREO 0x1021 - -#define MAX_ROQ_CODEBOOK_SIZE 256 - -// codebook entry for 2x2 vector -typedef struct -{ - // upper and lower luminance value pairs of 2x2 vector: [y0 y1], [y2 y3] - unsigned short v2_y_u; - unsigned short v2_y_l; - - // chrominance components - unsigned char u, v; - - // these variables are for rendering a doublesized 8x8 block; e.g.: - // v2_y_rows12 = [y0 y0 y1 y1] - // v2_y_rows34 = [y2 y2 y3 y3] - unsigned long v2d_y_rows_12; - unsigned long v2d_y_rows_34; - // ex: v2_u_row1 = [u u] - // v2_v_row2 = [v v] - unsigned short v2d_u_rows_12; - unsigned short v2d_v_rows_12; - - // maintain separate bytes for the luminance values as well - unsigned char y0, y1, y2, y3; -} roq_v2_codebook; - -// codebook entry for 4x4 vector -typedef struct -{ - unsigned char v2_index[4]; -} roq_v4_codebook; - -typedef struct -{ - roq_v2_codebook v2[MAX_ROQ_CODEBOOK_SIZE]; - roq_v4_codebook v4[MAX_ROQ_CODEBOOK_SIZE]; - mp_image_t *prev_frame; -} roqvideo_info; - - -// This function fills in the missing information for a v2 vector after -// loading the Y, U and V values. -inline void prep_v2(roq_v2_codebook *v2) -{ - v2->v2_y_u = be2me_16((v2->y0 << 8) | v2->y1); - v2->v2_y_l = be2me_16((v2->y2 << 8) | v2->y3); - - v2->v2d_y_rows_12 = be2me_32((v2->y0 << 24) | (v2->y0 << 16) | - (v2->y1 << 8) | v2->y1); - v2->v2d_y_rows_34 = be2me_32((v2->y2 << 24) | (v2->y2 << 16) | - (v2->y3 << 8) | v2->y3); - - // no reason to swap these for endianness since they're the same bytes - v2->v2d_u_rows_12 = (v2->u << 8) | v2->u; - v2->v2d_v_rows_12 = (v2->v << 8) | v2->v; -} - -inline void paint_v2double_block( - unsigned char *y_plane, - unsigned char *u_plane, - unsigned char *v_plane, - roq_v2_codebook *v2, - unsigned int y_stride, - unsigned int u_stride, - unsigned int v_stride) -{ - // render the luminance components - *(unsigned int *)y_plane = v2->v2d_y_rows_12; - y_plane += y_stride; - *(unsigned int *)y_plane = v2->v2d_y_rows_12; - y_plane += y_stride; - *(unsigned int *)y_plane = v2->v2d_y_rows_34; - y_plane += y_stride; - *(unsigned int *)y_plane = v2->v2d_y_rows_34; - - // render the color planes - *(unsigned short *)u_plane = v2->v2d_u_rows_12; - u_plane += u_stride; - *(unsigned short *)u_plane = v2->v2d_u_rows_12; - - *(unsigned short *)v_plane = v2->v2d_v_rows_12; - v_plane += v_stride; - *(unsigned short *)v_plane = v2->v2d_v_rows_12; -} - -inline void paint_v4_block( - unsigned char *y_plane, - unsigned char *u_plane, - unsigned char *v_plane, - unsigned int y_stride, - unsigned int u_stride, - unsigned int v_stride, - roq_v2_codebook *v2_a, - roq_v2_codebook *v2_b, - roq_v2_codebook *v2_c, - roq_v2_codebook *v2_d) -{ - // render luminance components - ((unsigned short *)y_plane)[0] = v2_a->v2_y_u; - ((unsigned short *)y_plane)[1] = v2_b->v2_y_u; - - y_plane += y_stride; - ((unsigned short *)y_plane)[0] = v2_a->v2_y_l; - ((unsigned short *)y_plane)[1] = v2_b->v2_y_l; - - y_plane += y_stride; - ((unsigned short *)y_plane)[0] = v2_c->v2_y_u; - ((unsigned short *)y_plane)[1] = v2_d->v2_y_u; - - y_plane += y_stride; - ((unsigned short *)y_plane)[0] = v2_c->v2_y_l; - ((unsigned short *)y_plane)[1] = v2_d->v2_y_l; - - // render the color planes - u_plane[0] = v2_a->u; - u_plane[1] = v2_b->u; - u_plane += u_stride; - u_plane[0] = v2_c->u; - u_plane[1] = v2_d->u; - - v_plane[0] = v2_a->v; - v_plane[1] = v2_b->v; - v_plane += v_stride; - v_plane[0] = v2_c->v; - v_plane[1] = v2_d->v; -} - -// This function copies the 4x4 block from the prev_*_planes to the -// current *_planes. -inline void copy_4x4_block( - unsigned char *y_plane, - unsigned char *u_plane, - unsigned char *v_plane, - unsigned char *prev_y_plane, - unsigned char *prev_u_plane, - unsigned char *prev_v_plane, - unsigned int y_stride, - unsigned int u_stride, - unsigned int v_stride) -{ - int i; - - // copy over the luminance components (4 rows, 1 uint each) - for (i = 0; i < 4; i++) - { - *(unsigned int *)y_plane = *(unsigned int *)prev_y_plane; - y_plane += y_stride; - prev_y_plane += y_stride; - } - - // copy the chrominance values - for (i = 0; i < 2; i++) - { - *(unsigned short*)u_plane = *(unsigned short*)prev_u_plane; - u_plane += u_stride; - prev_u_plane += u_stride; - *(unsigned short*)v_plane = *(unsigned short*)prev_v_plane; - v_plane += v_stride; - prev_v_plane += v_stride; - } -} - -// This function copies the 8x8 block from the prev_*_planes to the -// current *_planes. -inline void copy_8x8_block( - unsigned char *y_plane, - unsigned char *u_plane, - unsigned char *v_plane, - unsigned char *prev_y_plane, - unsigned char *prev_u_plane, - unsigned char *prev_v_plane, - unsigned int y_stride, - unsigned int u_stride, - unsigned int v_stride) -{ - int i; - - // copy over the luminance components (8 rows, 2 uints each) - for (i = 0; i < 8; i++) - { - ((unsigned int *)y_plane)[0] = ((unsigned int *)prev_y_plane)[0]; - ((unsigned int *)y_plane)[1] = ((unsigned int *)prev_y_plane)[1]; - y_plane += y_stride; - prev_y_plane += y_stride; - } - - // copy the chrominance values - for (i = 0; i < 4; i++) - { - *(unsigned int*)u_plane = *(unsigned int*)prev_u_plane; - u_plane += u_stride; - prev_u_plane += u_stride; - *(unsigned int*)v_plane = *(unsigned int*)prev_v_plane; - v_plane += v_stride; - prev_v_plane += v_stride; - } -} - -// This function creates storage space for the vector codebooks. -void *roq_decode_video_init(void) -{ - roqvideo_info *info = - (roqvideo_info *)calloc(sizeof(roqvideo_info), 1); - - info->prev_frame = NULL; - - return info; -} - -#define EMPTY_ROQ_CODEWORD 0xFFFF0000 - -#define FETCH_NEXT_CODE() \ - if (current_roq_codeword == EMPTY_ROQ_CODEWORD) \ - { \ - if (stream_ptr + 2 > encoded_size) \ - { \ - mp_msg(MSGT_DECVIDEO, MSGL_WARN, \ - "RoQ video: stream pointer just went out of bounds (1)\n"); \ - return; \ - } \ - current_roq_codeword = (0x0000FFFF) | \ - (encoded[stream_ptr + 0] << 16) | \ - (encoded[stream_ptr + 1] << 24); \ - stream_ptr += 2; \ - } \ - roq_code = ((current_roq_codeword >> 30) & 0x03); \ - current_roq_codeword <<= 2; - -#define FETCH_NEXT_ARGUMENT() \ - if (stream_ptr + 1 > encoded_size) \ - { \ - mp_msg(MSGT_DECVIDEO, MSGL_WARN, \ - "RoQ video: stream pointer just went out of bounds (2)\n"); \ - return; \ - } \ - argument = encoded[stream_ptr++]; - -#define CHECK_PREV_FRAME() \ - if (!info->prev_frame) \ - { \ - mp_msg(MSGT_DECVIDEO, MSGL_WARN, \ - "RoQ video: can't handle motion vector when there's no previous frame\n"); \ - return; \ - } - -void roq_decode_video(void *context, unsigned char *encoded, - int encoded_size, mp_image_t *mpi) -{ - roqvideo_info *info = (roqvideo_info *)context; - - int stream_ptr = 0; - int i, j; - int chunk_length; - int v2_count; - int v4_count; - - int roq_code; - unsigned int current_roq_codeword = EMPTY_ROQ_CODEWORD; - unsigned char argument = 0; - char mean_motion_x; - char mean_motion_y; - int mx, my; // for calculating the motion vector - - int mblock_x = 0; - int mblock_y = 0; - int quad8_x, quad8_y; // for pointing to 8x8 blocks in a macroblock - int quad4_x, quad4_y; // for pointing to 4x4 blocks in an 8x8 block - - unsigned char *y_plane; - unsigned char *u_plane; - unsigned char *v_plane; - unsigned char *prev_y_plane; - unsigned char *prev_u_plane; - unsigned char *prev_v_plane; - unsigned int y_stride = mpi->stride[0]; - unsigned int u_stride = mpi->stride[1]; - unsigned int v_stride = mpi->stride[2]; - - roq_v4_codebook v4; - - // make sure the encoded chunk is of minimal acceptable length - if (encoded_size < 8) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "RoQ video: chunk isn't even 8 bytes long (minimum acceptable length)\n"); - return; - } - - // make sure the resolution checks out - if ((mpi->width % 16 != 0) || (mpi->height % 16 != 0)) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "RoQ video resolution: %d x %d; expected dimensions divisible by 16\n"); - return; - } - - if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_CODEBOOK) - { - stream_ptr += 2; - chunk_length = LE_32(&encoded[stream_ptr]); - stream_ptr += 4; - v4_count = encoded[stream_ptr++]; - v2_count = encoded[stream_ptr++]; - if (v2_count == 0) - v2_count = 256; - if ((v4_count == 0) && (v2_count * 6 < chunk_length)) - v4_count = 256; - - // make sure the lengths agree with each other - if (((v2_count * 6) + (v4_count * 4)) != chunk_length) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "RoQ video: encountered quad codebook chunk with weird lengths (1)\n"); - return; - } - if ((v2_count * 6) > (encoded_size - stream_ptr)) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "RoQ video: encountered quad codebook chunk with weird lengths (2)\n"); - return; - } - - // load the 2x2 vectors - for (i = 0; i < v2_count; i++) - { - info->v2[i].y0 = encoded[stream_ptr++]; - info->v2[i].y1 = encoded[stream_ptr++]; - info->v2[i].y2 = encoded[stream_ptr++]; - info->v2[i].y3 = encoded[stream_ptr++]; - info->v2[i].u = encoded[stream_ptr++]; - info->v2[i].v = encoded[stream_ptr++]; - prep_v2(&info->v2[i]); - } - - if ((v4_count * 4) > (encoded_size - stream_ptr)) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "RoQ video: encountered quad codebook chunk with weird lengths (3)\n"); - return; - } - - // load the 4x4 vectors - for (i = 0; i < v4_count; i++) - { - info->v4[i].v2_index[0] = encoded[stream_ptr++]; - info->v4[i].v2_index[1] = encoded[stream_ptr++]; - info->v4[i].v2_index[2] = encoded[stream_ptr++]; - info->v4[i].v2_index[3] = encoded[stream_ptr++]; - } - } - - if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_VQ) - { - stream_ptr += 2; - chunk_length = LE_32(&encoded[stream_ptr]); - stream_ptr += 4; - mean_motion_y = encoded[stream_ptr++]; - mean_motion_x = encoded[stream_ptr++]; - - // start by copying entire previous frame - if (info->prev_frame) - { - memcpy(mpi->planes[0], info->prev_frame->planes[0], - mpi->width * mpi->height); - memcpy(mpi->planes[1], info->prev_frame->planes[1], - (mpi->width * mpi->height) / 4); - memcpy(mpi->planes[2], info->prev_frame->planes[2], - (mpi->width * mpi->height) / 4); - } - - // iterate through the 16x16 macroblocks - for (mblock_y = 0; mblock_y < mpi->height; mblock_y += 16) - { - for (mblock_x = 0; mblock_x < mpi->width; mblock_x += 16) - { - // iterate through the 4 quadrants of the macroblock - for (i = 0; i < 4; i++) - { - quad8_x = mblock_x; - quad8_y = mblock_y; - if (i & 0x01) quad8_x += 8; - if (i & 0x02) quad8_y += 8; - - // set up the planes - y_plane = mpi->planes[0] + quad8_y * y_stride + quad8_x; - u_plane = mpi->planes[1] + (quad8_y / 2) * u_stride + (quad8_x / 2); - v_plane = mpi->planes[2] + (quad8_y / 2) * v_stride + (quad8_x / 2); - - // decide how to handle this 8x8 quad - FETCH_NEXT_CODE(); - switch(roq_code) - { - // 8x8 block is the same as in the previous frame; - // skip it - case 0: - break; - - // 8x8 block is painted with an 8x8 block from the last frame - // (i.e., motion compensation) - case 1: - CHECK_PREV_FRAME(); - - // prepare the pointers to the planes in the previous frame - FETCH_NEXT_ARGUMENT(); // argument contains motion vectors - - // figure out the motion vectors - mx = quad8_x + 8 - (argument >> 4) - mean_motion_x; - my = quad8_y + 8 - (argument & 0x0F) - mean_motion_y; - - prev_y_plane = info->prev_frame->planes[0] + - my * y_stride + mx; - prev_u_plane = info->prev_frame->planes[1] + - (my / 2) * u_stride + (mx + 1) / 2; - prev_v_plane = info->prev_frame->planes[2] + - (my / 2) * v_stride + (mx + 1) / 2; - -// sanity check before rendering - copy_8x8_block( - y_plane, - u_plane, - v_plane, - prev_y_plane, - prev_u_plane, - prev_v_plane, - y_stride, - u_stride, - v_stride - ); - break; - - // 8x8 block is painted with a doublesized 4x4 vector - case 2: - FETCH_NEXT_ARGUMENT(); - v4 = info->v4[argument]; - -// sanity check before rendering - // iterate through 4 4x4 blocks - for (j = 0; j < 4; j++) - { - quad4_x = quad8_x; - quad4_y = quad8_y; - if (j & 0x01) quad4_x += 4; - if (j & 0x02) quad4_y += 4; - - // set up the planes - y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x; - u_plane = mpi->planes[1] + - (quad4_y / 2) * u_stride + (quad4_x / 2); - v_plane = mpi->planes[2] + - (quad4_y / 2) * v_stride + (quad4_x / 2); - - paint_v2double_block( - y_plane, - u_plane, - v_plane, - &info->v2[v4.v2_index[j]], - y_stride, - u_stride, - v_stride - ); - } - break; - - // 8x8 block is broken down into 4 4x4 blocks and painted using - // 4 different codes. - case 3: - // iterate through 4 4x4 blocks - for (j = 0; j < 4; j++) - { - quad4_x = quad8_x; - quad4_y = quad8_y; - if (j & 0x01) quad4_x += 4; - if (j & 0x02) quad4_y += 4; - - // set up the planes - y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x; - u_plane = mpi->planes[1] + - (quad4_y / 2) * u_stride + (quad4_x / 2); - v_plane = mpi->planes[2] + - (quad4_y / 2) * v_stride + (quad4_x / 2); - - // decide how to handle this 4x4 quad - FETCH_NEXT_CODE(); - switch(roq_code) - { - // 4x4 block is the same as in the previous frame; - // skip it - case 0: - break; - - // 4x4 block is motion compensated from the previous frame - case 1: - CHECK_PREV_FRAME(); - // prepare the pointers to the planes in the previous frame - FETCH_NEXT_ARGUMENT(); // argument contains motion vectors - - // figure out the motion vectors - mx = quad4_x + 8 - (argument >> 4) - mean_motion_x; - my = quad4_y + 8 - (argument & 0x0F) - mean_motion_y; - - prev_y_plane = info->prev_frame->planes[0] + - my * y_stride + mx; - prev_u_plane = info->prev_frame->planes[1] + - (my / 2) * u_stride + (mx + 1) / 2; - prev_v_plane = info->prev_frame->planes[2] + - (my / 2) * u_stride + (mx + 1) / 2; - -// sanity check before rendering - copy_4x4_block( - y_plane, - u_plane, - v_plane, - prev_y_plane, - prev_u_plane, - prev_v_plane, - y_stride, - u_stride, - v_stride - ); - break; - - // 4x4 block is copied directly from v4 vector table - case 2: - FETCH_NEXT_ARGUMENT(); - v4 = info->v4[argument]; - - paint_v4_block( - y_plane, - u_plane, - v_plane, - y_stride, - u_stride, - v_stride, - &info->v2[v4.v2_index[0]], - &info->v2[v4.v2_index[1]], - &info->v2[v4.v2_index[2]], - &info->v2[v4.v2_index[3]]); - break; - - // 4x4 block is built from 4 2x2 vectors - case 3: - if (stream_ptr + 4 > encoded_size) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "RoQ video: stream pointer just went out of bounds (2)\n"); - return; - } - paint_v4_block( - y_plane, - u_plane, - v_plane, - y_stride, - u_stride, - v_stride, - &info->v2[encoded[stream_ptr + 0]], - &info->v2[encoded[stream_ptr + 1]], - &info->v2[encoded[stream_ptr + 2]], - &info->v2[encoded[stream_ptr + 3]]); - stream_ptr += 4; - break; - } - } - break; - } - } - } - } - } - - // one last sanity check on the way out - // (apparently, it's not unusual to have 2 bytes left over after decode) - if (stream_ptr < encoded_size - 2) - { - mp_msg(MSGT_DECVIDEO, MSGL_WARN, - "RoQ video: completed frame decode with bytes left over (%d < %d)\n", - stream_ptr, encoded_size); - } - - // save the current frame as the previous frame for the next iteration - info->prev_frame = mpi; -} - -// Initialize the RoQ audio decoder, which is to say, initialize the table -// of squares. -void *roq_decode_audio_init(void) -{ - short *square_array; - short square; - int i; - - square_array = (short *)malloc(256 * sizeof(short)); - if (!square_array) - return NULL; - - for (i = 0; i < 128; i++) - { - square = i * i; - square_array[i] = square; - square_array[i + 128] = -square; - } - - return square_array; -} - -int roq_decode_audio( - unsigned short *output, - unsigned char *input, - int encoded_size, - int channels, - void *context) -{ - short *square_array = (short *)context; - int i; - int predictor[2]; - int channel_number = 0; - - // prepare the initial predictors - if (channels == 1) - predictor[0] = LE_16(&input[0]); - else - { - predictor[0] = input[1] << 8; - predictor[1] = input[0] << 8; - } - SE_16BIT(predictor[0]); - SE_16BIT(predictor[1]); - - // decode the samples - for (i = 2; i < encoded_size; i++) - { - predictor[channel_number] += square_array[input[i]]; - CLAMP_S16(predictor[channel_number]); - output[i - 2] = predictor[channel_number]; - - // toggle channel - channel_number ^= channels - 1; - } - - // return the number of samples decoded - return (encoded_size - 2); -} diff -r fd85802f755b -r 628c85c15c7b roqav.h --- a/roqav.h Sat Apr 13 17:44:14 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -#ifndef ROQAV_H -#define ROQAV_H - -void *roq_decode_video_init(void); -void roq_decode_video(void *context, unsigned char *encoded, - int encoded_size, mp_image_t *mpi); - -void *roq_decode_audio_init(void); -int roq_decode_audio(unsigned short *output, unsigned char *input, - int encoded_size, int channels, void *context); - -#endif // ROQAV_H