changeset 5602:628c85c15c7b

moved to libmpcodecs/native/
author arpi
date Sat, 13 Apr 2002 18:03:02 +0000
parents fd85802f755b
children 5f4485862a72
files RTjpegN.c RTjpegN.h alaw.h cinepak.c cyuv.c fli.c libmpcodecs/native/RTjpegN.c libmpcodecs/native/RTjpegN.h libmpcodecs/native/alaw.h libmpcodecs/native/cinepak.c libmpcodecs/native/cyuv.c libmpcodecs/native/fli.c libmpcodecs/native/lzoconf.h libmpcodecs/native/minilzo.c libmpcodecs/native/minilzo.h libmpcodecs/native/msvidc.c libmpcodecs/native/nuppelvideo.c libmpcodecs/native/qtrle.c libmpcodecs/native/qtrpza.c libmpcodecs/native/qtsmc.c libmpcodecs/native/roqav.c libmpcodecs/native/roqav.h lzoconf.h minilzo.c minilzo.h msvidc.c nuppelvideo.c qtrle.c qtrpza.c qtsmc.c roqav.c roqav.h
diffstat 32 files changed, 10760 insertions(+), 10760 deletions(-) [+]
line wrap: on
line diff
--- a/RTjpegN.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3800 +0,0 @@
-/* 
-   RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za)
-   
-   With modifications by:
-   (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de>
-   and
-   (c) 1999 by Wim Taymans <wim.taymans@tvd.be>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-    
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "config.h"
-#ifdef HAVE_MMX
-#define MMX
-#endif
-
-#include "RTjpegN.h"
-
-#ifdef MMX
-#include "mmx.h"
-#endif
-
-//#define SHOWBLOCK 1
-#define BETTERCOMPRESSION 1
-
-static const unsigned char RTjpeg_ZZ[64]={
-0,
-8, 1,
-2, 9, 16,
-24, 17, 10, 3,
-4, 11, 18, 25, 32,
-40, 33, 26, 19, 12, 5,
-6, 13, 20, 27, 34, 41, 48,
-56, 49, 42, 35, 28, 21, 14, 7,
-15, 22, 29, 36, 43, 50, 57,
-58, 51, 44, 37, 30, 23,
-31, 38, 45, 52, 59,
-60, 53, 46, 39,
-47, 54, 61,
-62, 55,
-63 };
-
-static const __u64 RTjpeg_aan_tab[64]={
-4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, 
-5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL, 
-5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL, 
-5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL, 
-4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, 
-3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL, 
-2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL, 
-1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL, 
-};
-
-#ifndef MMX
-static __s32 RTjpeg_ws[64+31];
-#endif
-__u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32];
-
-static __s16 *block; // rh
-static __s16 *RTjpeg_block;
-static __s32 *RTjpeg_lqt;
-static __s32 *RTjpeg_cqt;
-static __u32 *RTjpeg_liqt;
-static __u32 *RTjpeg_ciqt;
-
-static unsigned char RTjpeg_lb8;
-static unsigned char RTjpeg_cb8;
-static int RTjpeg_width, RTjpeg_height;
-static int RTjpeg_Ywidth, RTjpeg_Cwidth;
-static int RTjpeg_Ysize, RTjpeg_Csize;
-
-static __s16 *RTjpeg_old=NULL;
-
-#ifdef MMX
-mmx_t RTjpeg_lmask;
-mmx_t RTjpeg_cmask;
-#else
-__u16 RTjpeg_lmask;
-__u16 RTjpeg_cmask;
-#endif
-int RTjpeg_mtest=0;
-
-static const unsigned char RTjpeg_lum_quant_tbl[64] = {
-    16,  11,  10,  16,  24,  40,  51,  61,
-    12,  12,  14,  19,  26,  58,  60,  55,
-    14,  13,  16,  24,  40,  57,  69,  56,
-    14,  17,  22,  29,  51,  87,  80,  62,
-    18,  22,  37,  56,  68, 109, 103,  77,
-    24,  35,  55,  64,  81, 104, 113,  92,
-    49,  64,  78,  87, 103, 121, 120, 101,
-    72,  92,  95,  98, 112, 100, 103,  99
- };
-
-static const unsigned char RTjpeg_chrom_quant_tbl[64] = {
-    17,  18,  24,  47,  99,  99,  99,  99,
-    18,  21,  26,  66,  99,  99,  99,  99,
-    24,  26,  56,  99,  99,  99,  99,  99,
-    47,  66,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99
- };
-
-#ifdef BETTERCOMPRESSION 
-
-/*--------------------------------------------------*/
-/*  better encoding, but needs a lot more cpu time  */
-/*  seems to be more effective than old method +lzo */
-/*  with this encoding lzo isn't efficient anymore  */
-/*  there is still more potential for better        */
-/*  encoding but that would need even more cputime  */
-/*  anyway your mileage may vary                    */
-/*                                                  */
-/*  written by Martin BIELY and Roman HOCHLEITNER   */
-/*--------------------------------------------------*/
-
-/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* Block to Stream (encoding)                         */
-/*                                                    */
-
-int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
-{
- register int ci, co=1;
- register __s16 ZZvalue;
- register unsigned char bitten;
- register unsigned char bitoff;
-
-#ifdef SHOWBLOCK
-
-  int ii;
-  for (ii=0; ii < 64; ii++) {
-    fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
-  }
-  fprintf(stdout, "\n\n");
-
-#endif
-
-// *strm++ = 0x10;
-// *strm   = 0x00;
-//
-// return 2;
-
- // first byte allways written
- (__u8)strm[0]=
-      (__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
-
-
- ci=63;
- while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--;
-
- bitten = ((unsigned char)ci) << 2;
-
- if (ci==0) {
-   (__u8)strm[1]= bitten; 
-   co = 2;
-   return (int)co;
- }
- 
- /* bitoff=0 because the high 6bit contain first non zero position */ 
- bitoff = 0;
- co = 1;
-
- for(; ci>0; ci--) {
-
-   ZZvalue = data[RTjpeg_ZZ[ci]];
-   
-   switch(ZZvalue) {
-   case 0:
-	break;
-   case 1:
-        bitten |= (0x01<<bitoff);
-	break;
-   case -1:
-        bitten |= (0x03<<bitoff);
-	break;
-   default:
-        bitten |= (0x02<<bitoff);
-	goto HERZWEH;
-	break;
-   }
-
-   if( bitoff == 0 ) {
-      (__u8)strm[co]= bitten; 
-      bitten = 0;
-      bitoff = 8; 
-      co++;
-   } /* "fall through" */
-   bitoff-=2; 
-
- }
- 
- /* ci must be 0 */
- if(bitoff != 6) {
-
-      (__u8)strm[co]= bitten; 
-      co++;
-     
- }
- goto BAUCHWEH;
-
-HERZWEH:
-/* ci cannot be 0 */
-/* correct bitoff to nibble boundaries */
-
- switch(bitoff){
- case 4:
- case 6:
-   bitoff = 0;
-   break;
- case 2:
- case 0:
-   (__u8)strm[co]= bitten; 
-   bitoff = 4;
-   co++;
-   bitten = 0; // clear half nibble values in bitten
-   break;
- default:
-   break;
- }
-
- for(; ci>0; ci--) {
-
-   ZZvalue = data[RTjpeg_ZZ[ci]];
-
-   if( (ZZvalue > 7) || (ZZvalue < -7) ) {
-        bitten |= (0x08<<bitoff);
-	goto HIRNWEH;
-   } 
-   
-   bitten |= (ZZvalue&0xf)<<bitoff;
-
-   if( bitoff == 0 ) {
-      (__u8)strm[co]= bitten; 
-      bitten = 0;
-      bitoff = 8;
-      co++;
-   } /* "fall thru" */
-   bitoff-=4;
- }
- 
- /* ci must be 0 */
- if( bitoff == 0 ) {
-    (__u8)strm[co]= bitten; 
-    co++;
- }  
- goto BAUCHWEH;
-
-HIRNWEH:
-
- (__u8)strm[co]= bitten; 
- co++;
-  
-
- /* bitting is over now we bite */
- for(; ci>0; ci--) {
-
-   ZZvalue = data[RTjpeg_ZZ[ci]];
-
-   if(ZZvalue>0) 
-   {
-     strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
-   } 
-   else 
-   {
-     strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
-   }
-
- }
- 
-
-BAUCHWEH:
-  /* we gotoo much now we are ill */
-#ifdef SHOWBLOCK
-{
-int i;
-fprintf(stdout, "\nco = '%d'\n", co);
- for (i=0; i < co+2; i++) {
-   fprintf(stdout, "%d ", strm[i]);
- }
-fprintf(stdout, "\n\n");
-}
-#endif
-
- return (int)co;
-}
-
-/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* Stream to Block  (decoding)                        */
-/*                                                    */
-
-int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
-{
- int ci;
- register int co;
- register int i;
- register unsigned char bitten;
- register unsigned char bitoff;
-
- /* first byte always read */
- i=RTjpeg_ZZ[0];
- data[i]=((__u8)strm[0])*qtbl[i];
-
- /* we start at the behind */ 
-
- bitten = ((unsigned char)strm[1]) >> 2;
- co = 63;
- for(; co > bitten; co--) {
-
-   data[RTjpeg_ZZ[co]] = 0;
-
- }
-
- if (co==0) {
-   ci = 2;
-   goto AUTOBAHN;
- }
-
- /* we have to read the last 2 bits of the second byte */
- ci=1;
- bitoff = 0;
-
- for(; co>0; co--) {
-
-  bitten  = ((unsigned char)strm[ci]) >> bitoff;
-  bitten &= 0x03;
-
-  i=RTjpeg_ZZ[co];
-
-  switch( bitten ) {
-  case 0x03:
-    data[i]= -qtbl[i];
-    break;
-  case 0x02:
-    goto FUSSWEG;
-    break;
-  case 0x01:
-    data[i]= qtbl[i];
-    break;
-  case 0x00:
-    data[i]= 0;
-    break;
-  default: 
-	
-  } 
-
-  if( bitoff == 0 ) {
-    bitoff = 8;
-    ci++;
-  }
-  bitoff -= 2;
- }
- /* co is 0 now */
- /* data is written properly */
-
- /* if bitoff!=6 then ci is the index, but should be the byte count, so we increment by 1 */
- if (bitoff!=6) ci++;
-
- goto AUTOBAHN;
- 
- 
-FUSSWEG:
-/* correct bitoff to nibble */
- switch(bitoff){
- case 4:
- case 6:
-   bitoff = 0;
-   break;
- case 2:
- case 0:
-   /* we have to read from the next byte */
-   ci++;
-   bitoff = 4;
-   break;
- default:
-   break;
- }
-
- for(; co>0; co--) {
-
-  bitten  = ((unsigned char)strm[ci]) >> bitoff;
-  bitten &= 0x0f;
-
-  i=RTjpeg_ZZ[co];
-
-  if( bitten == 0x08 ) {
-    goto STRASSE;
-  }
-
-  /* the compiler cannot do sign extension for signed nibbles */
-  if( bitten & 0x08 ) {
-    bitten |= 0xf0;
-  }
-  /* the unsigned char bitten now is a valid signed char */
-   
-  data[i]=((signed char)bitten)*qtbl[i];
-  
-  if( bitoff == 0 ) {
-    bitoff = 8;
-    ci++;
-  }
-  bitoff -= 4;
- }
- /* co is 0 */
-
- /* if bitoff!=4 then ci is the index, but should be the byte count, so we increment by 1 */
- if (bitoff!=4) ci++;
-
- goto AUTOBAHN;
-
-STRASSE:
-  ci++; 
- 
- for(; co>0; co--) {
-  i=RTjpeg_ZZ[co];
-  data[i]=strm[ci++]*qtbl[i];
- }
-
- /* ci now is the count, because it points to next element => no incrementing */
-
-AUTOBAHN:
-
-#ifdef SHOWBLOCK
-fprintf(stdout, "\nci = '%d'\n", ci);
- for (i=0; i < 64; i++) {
-   fprintf(stdout, "%d ", data[RTjpeg_ZZ[i]]);
- }
-fprintf(stdout, "\n\n");
-#endif
-
- return ci;
-}
-
-#else
-
-int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
-{
- register int ci, co=1, tmp;
- register __s16 ZZvalue;
-
-#ifdef SHOWBLOCK
-
-  int ii;
-  for (ii=0; ii < 64; ii++) {
-    fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
-  }
-  fprintf(stdout, "\n\n");
-
-#endif
-
- (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
-
- for(ci=1; ci<=bt8; ci++) 
- {
-	ZZvalue = data[RTjpeg_ZZ[ci]];
-
-   if(ZZvalue>0) 
-	{
-     strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
-   } 
-	else 
-	{
-     strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
-   }
- }
-
- for(; ci<64; ci++) 
- {
-  ZZvalue = data[RTjpeg_ZZ[ci]];
-
-  if(ZZvalue>0)
-  {
-   strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue;
-  } 
-  else if(ZZvalue<0)
-  {
-   strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue;
-  } 
-  else /* compress zeros */
-  {
-   tmp=ci;
-   do
-   {
-    ci++;
-   } 
-	while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0));
-
-   strm[co++]=(__s8)(63+(ci-tmp));
-   ci--;
-  }
- }
- return (int)co;
-}
-
-int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
-{
- int ci=1, co=1, tmp;
- register int i;
-
- i=RTjpeg_ZZ[0];
- data[i]=((__u8)strm[0])*qtbl[i];
-
- for(co=1; co<=bt8; co++)
- {
-  i=RTjpeg_ZZ[co];
-  data[i]=strm[ci++]*qtbl[i];
- }
- 
- for(; co<64; co++)
- {
-  if(strm[ci]>63)
-  {
-   tmp=co+strm[ci]-63;
-   for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0;
-   co--;
-  } else
-  {
-   i=RTjpeg_ZZ[co];
-   data[i]=strm[ci]*qtbl[i];
-  }
-  ci++;
- }
- return (int)ci;
-}
-#endif
-
-#if defined(MMX)
-void RTjpeg_quant_init(void)
-{
- int i;
- __s16 *qtbl;
- 
- qtbl=(__s16 *)RTjpeg_lqt;
- for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i];
-
- qtbl=(__s16 *)RTjpeg_cqt;
- for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i];
-}
-
-static mmx_t RTjpeg_ones=(mmx_t)(long long)0x0001000100010001LL;
-static mmx_t RTjpeg_half=(mmx_t)(long long)0x7fff7fff7fff7fffLL;
-
-void RTjpeg_quant(__s16 *block, __s32 *qtbl)
-{
- int i;
- mmx_t *bl, *ql;
- 
- ql=(mmx_t *)qtbl;
- bl=(mmx_t *)block;
- 
- movq_m2r(RTjpeg_ones, mm6);
- movq_m2r(RTjpeg_half, mm7);
-
- for(i=16; i; i--) 
- {
-  movq_m2r(*(ql++), mm0); /* quant vals (4) */
-  movq_m2r(*bl, mm2); /* block vals (4) */
-  movq_r2r(mm0, mm1);
-  movq_r2r(mm2, mm3);
-  
-  punpcklwd_r2r(mm6, mm0); /*           1 qb 1 qa */
-  punpckhwd_r2r(mm6, mm1); /* 1 qd 1 qc */
-  
-  punpcklwd_r2r(mm7, mm2); /*                   32767 bb 32767 ba */
-  punpckhwd_r2r(mm7, mm3); /* 32767 bd 32767 bc */
-  
-  pmaddwd_r2r(mm2, mm0); /*                         32767+bb*qb 32767+ba*qa */
-  pmaddwd_r2r(mm3, mm1); /* 32767+bd*qd 32767+bc*qc */
-  
-  psrad_i2r(16, mm0);
-  psrad_i2r(16, mm1);
-  
-  packssdw_r2r(mm1, mm0);
-  
-  movq_r2m(mm0, *(bl++));
-  
- }
-}
-#else
-void RTjpeg_quant_init(void)
-{
-}
-
-void RTjpeg_quant(__s16 *block, __s32 *qtbl)
-{
- int i;
- 
- for(i=0; i<64; i++)
-   block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16);
-}
-#endif
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-#ifdef MMX
-static mmx_t RTjpeg_C4   =(mmx_t)(long long)0x2D412D412D412D41LL;
-static mmx_t RTjpeg_C6   =(mmx_t)(long long)0x187E187E187E187ELL;
-static mmx_t RTjpeg_C2mC6=(mmx_t)(long long)0x22A322A322A322A3LL;
-static mmx_t RTjpeg_C2pC6=(mmx_t)(long long)0x539F539F539F539FLL;
-static mmx_t RTjpeg_zero =(mmx_t)(long long)0x0000000000000000LL;
-
-#else
-
-#define FIX_0_382683433  ((__s32)   98)		/* FIX(0.382683433) */
-#define FIX_0_541196100  ((__s32)  139)		/* FIX(0.541196100) */
-#define FIX_0_707106781  ((__s32)  181)		/* FIX(0.707106781) */
-#define FIX_1_306562965  ((__s32)  334)		/* FIX(1.306562965) */
-
-#define DESCALE10(x) (__s16)( ((x)+128) >> 8)
-#define DESCALE20(x)  (__s16)(((x)+32768) >> 16)
-#define D_MULTIPLY(var,const)  ((__s32) ((var) * (const)))
-#endif
-
-void RTjpeg_dct_init(void)
-{
- int i;
- 
- for(i=0; i<64; i++)
- {
-  RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]);
-  RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]);
- }
-}
-
-void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
-{
-#ifndef MMX
-  __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  __s32 tmp10, tmp11, tmp12, tmp13;
-  __s32 z1, z2, z3, z4, z5, z11, z13;
-  __u8 *idataptr;
-  __s16 *odataptr;
-  __s32 *wsptr;
-  int ctr;
-
-  idataptr = idata;
-  wsptr = RTjpeg_ws;
-  for (ctr = 7; ctr >= 0; ctr--) {
-    tmp0 = idataptr[0] + idataptr[7];
-    tmp7 = idataptr[0] - idataptr[7];
-    tmp1 = idataptr[1] + idataptr[6];
-    tmp6 = idataptr[1] - idataptr[6];
-    tmp2 = idataptr[2] + idataptr[5];
-    tmp5 = idataptr[2] - idataptr[5];
-    tmp3 = idataptr[3] + idataptr[4];
-    tmp4 = idataptr[3] - idataptr[4];
-    
-    tmp10 = (tmp0 + tmp3);	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = (tmp1 + tmp2);
-    tmp12 = tmp1 - tmp2;
-    
-    wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */
-    wsptr[4] = (tmp10 - tmp11)<<8;
-    
-    z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    wsptr[2] = (tmp13<<8) + z1;	/* phase 5 */
-    wsptr[6] = (tmp13<<8) - z1;
-    
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = (tmp7<<8) + z3;		/* phase 5 */
-    z13 = (tmp7<<8) - z3;
-
-    wsptr[5] = z13 + z2;	/* phase 6 */
-    wsptr[3] = z13 - z2;
-    wsptr[1] = z11 + z4;
-    wsptr[7] = z11 - z4;
-
-    idataptr += rskip<<3;		/* advance pointer to next row */
-    wsptr += 8;
-  }
-
-  wsptr = RTjpeg_ws;
-  odataptr=odata;
-  for (ctr = 7; ctr >= 0; ctr--) {
-    tmp0 = wsptr[0] + wsptr[56];
-    tmp7 = wsptr[0] - wsptr[56];
-    tmp1 = wsptr[8] + wsptr[48];
-    tmp6 = wsptr[8] - wsptr[48];
-    tmp2 = wsptr[16] + wsptr[40];
-    tmp5 = wsptr[16] - wsptr[40];
-    tmp3 = wsptr[24] + wsptr[32];
-    tmp4 = wsptr[24] - wsptr[32];
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-    
-    odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */
-    odataptr[32] = DESCALE10(tmp10 - tmp11);
-    
-    z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */
-    odataptr[48] = DESCALE20((tmp13<<8) - z1);
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = (tmp7<<8) + z3;		/* phase 5 */
-    z13 = (tmp7<<8) - z3;
-
-    odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */
-    odataptr[24] = DESCALE20(z13 - z2);
-    odataptr[8] = DESCALE20(z11 + z4);
-    odataptr[56] = DESCALE20(z11 - z4);
-
-    odataptr++;			/* advance pointer to next column */
-    wsptr++;
-  }
-#else
-  volatile mmx_t tmp6, tmp7;
-  register mmx_t *dataptr = (mmx_t *)odata;
-  mmx_t *idata2 = (mmx_t *)idata;
-
-   // first copy the input 8 bit to the destination 16 bits
-
-   movq_m2r(RTjpeg_zero, mm2);
-
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+1));
-	
-	idata2 += rskip;
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr+2));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+3));
-	
-	idata2 += rskip;
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr+4));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+5));
-	
-	idata2 += rskip;
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr+6));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+7));
-	
-	idata2 += rskip;
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr+8));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+9));
-	
-	idata2 += rskip;
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr+10));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+11));
-	
-	idata2 += rskip;
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr+12));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+13));
-	
-	idata2 += rskip;
-
-	movq_m2r(*idata2, mm0);		 
-	movq_r2r(mm0, mm1);		 			
-
-	punpcklbw_r2r(mm2, mm0);
-	movq_r2m(mm0, *(dataptr+14));
-
-	punpckhbw_r2r(mm2, mm1);
-	movq_r2m(mm1, *(dataptr+15));
-
-/*  Start Transpose to do calculations on rows */
-
-	movq_m2r(*(dataptr+9), mm7);		 	// m03:m02|m01:m00 - first line (line 4)and copy into m5
-
-	movq_m2r(*(dataptr+13), mm6);	    	// m23:m22|m21:m20 - third line (line 6)and copy into m2
-	movq_r2r(mm7, mm5);		 			
-
-	punpcklwd_m2r(*(dataptr+11), mm7); 	// m11:m01|m10:m00 - interleave first and second lines
-	movq_r2r(mm6, mm2);						 
-
-	punpcklwd_m2r(*(dataptr+15), mm6);  // m31:m21|m30:m20 - interleave third and fourth lines
-	movq_r2r(mm7, mm1);
-
-	movq_m2r(*(dataptr+11), mm3);	      // m13:m13|m11:m10 - second line	 
-	punpckldq_r2r(mm6, mm7);				// m30:m20|m10:m00 - interleave to produce result 1
-
-	movq_m2r(*(dataptr+15), mm0);	      // m13:m13|m11:m10 - fourth line
-	punpckhdq_r2r(mm6, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
-
-	movq_r2m(mm7,*(dataptr+9));			// write result 1
-	punpckhwd_r2r(mm3, mm5);				// m13:m03|m12:m02 - interleave first and second lines
-	
-	movq_r2m(mm1,*(dataptr+11));			// write result 2
-	punpckhwd_r2r(mm0, mm2);				// m33:m23|m32:m22 - interleave third and fourth lines
-
-	movq_r2r(mm5, mm1);
-	punpckldq_r2r(mm2, mm5);				// m32:m22|m12:m02 - interleave to produce result 3
-
-	movq_m2r(*(dataptr+1), mm0);			// m03:m02|m01:m00 - first line, 4x4
-	punpckhdq_r2r(mm2, mm1);				// m33:m23|m13:m03 - interleave to produce result 4
-
-	movq_r2m(mm5,*(dataptr+13));			// write result 3
-
-	// last 4x4 done
-
-	movq_r2m(mm1, *(dataptr+15));			// write result 4, last 4x4
-
-	movq_m2r(*(dataptr+5), mm2);			// m23:m22|m21:m20 - third line
-	movq_r2r(mm0, mm6);
-
-	punpcklwd_m2r(*(dataptr+3), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
-	movq_r2r(mm2, mm7);
-
-	punpcklwd_m2r(*(dataptr+7), mm2);  	// m31:m21|m30:m20 - interleave third and fourth lines
-	movq_r2r(mm0, mm4);
-
-	//
-	movq_m2r(*(dataptr+8), mm1);			// n03:n02|n01:n00 - first line 
-	punpckldq_r2r(mm2, mm0);				// m30:m20|m10:m00 - interleave to produce first result
-
-	movq_m2r(*(dataptr+12), mm3);			// n23:n22|n21:n20 - third line
-	punpckhdq_r2r(mm2, mm4);				// m31:m21|m11:m01 - interleave to produce second result
-
-	punpckhwd_m2r(*(dataptr+3), mm6);  	// m13:m03|m12:m02 - interleave first and second lines
-	movq_r2r(mm1, mm2);               	// copy first line
-
-	punpckhwd_m2r(*(dataptr+7), mm7);  	// m33:m23|m32:m22 - interleave third and fourth lines
-	movq_r2r(mm6, mm5);						// copy first intermediate result
-
-	movq_r2m(mm0, *(dataptr+8));			// write result 1
-	punpckhdq_r2r(mm7, mm5);				// m33:m23|m13:m03 - produce third result
-
-	punpcklwd_m2r(*(dataptr+10), mm1);  // n11:n01|n10:n00 - interleave first and second lines
-	movq_r2r(mm3, mm0);						// copy third line
-
-	punpckhwd_m2r(*(dataptr+10), mm2);  // n13:n03|n12:n02 - interleave first and second lines
-
-	movq_r2m(mm4, *(dataptr+10));			// write result 2 out
-	punpckldq_r2r(mm7, mm6);				// m32:m22|m12:m02 - produce fourth result
-
-	punpcklwd_m2r(*(dataptr+14), mm3);  // n31:n21|n30:n20 - interleave third and fourth lines
-	movq_r2r(mm1, mm4);
-
-	movq_r2m(mm6, *(dataptr+12));			// write result 3 out
-	punpckldq_r2r(mm3, mm1);				// n30:n20|n10:n00 - produce first result
-
-	punpckhwd_m2r(*(dataptr+14), mm0);  // n33:n23|n32:n22 - interleave third and fourth lines
-	movq_r2r(mm2, mm6);
-
-	movq_r2m(mm5, *(dataptr+14));			// write result 4 out
-	punpckhdq_r2r(mm3, mm4);				// n31:n21|n11:n01- produce second result
-
-	movq_r2m(mm1, *(dataptr+1));			// write result 5 out - (first result for other 4 x 4 block)
-	punpckldq_r2r(mm0, mm2);				// n32:n22|n12:n02- produce third result
-
-	movq_r2m(mm4, *(dataptr+3));			// write result 6 out
-	punpckhdq_r2r(mm0, mm6);				// n33:n23|n13:n03 - produce fourth result
-
-	movq_r2m(mm2, *(dataptr+5));			// write result 7 out
-
-	movq_m2r(*dataptr, mm0);				// m03:m02|m01:m00 - first line, first 4x4
-
-	movq_r2m(mm6, *(dataptr+7));			// write result 8 out
-
-
-// Do first 4x4 quadrant, which is used in the beginning of the DCT:
-
-	movq_m2r(*(dataptr+4), mm7);			// m23:m22|m21:m20 - third line
-	movq_r2r(mm0, mm2);
-
-	punpcklwd_m2r(*(dataptr+2), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
-	movq_r2r(mm7, mm4);
-
-	punpcklwd_m2r(*(dataptr+6), mm7);  	// m31:m21|m30:m20 - interleave third and fourth lines
-	movq_r2r(mm0, mm1);
-
-	movq_m2r(*(dataptr+2), mm6);			// m13:m12|m11:m10 - second line
-	punpckldq_r2r(mm7, mm0);				// m30:m20|m10:m00 - interleave to produce result 1
-
-	movq_m2r(*(dataptr+6), mm5);			// m33:m32|m31:m30 - fourth line
-	punpckhdq_r2r(mm7, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
-
-	movq_r2r(mm0, mm7);						// write result 1
-	punpckhwd_r2r(mm6, mm2);				// m13:m03|m12:m02 - interleave first and second lines
-
-	psubw_m2r(*(dataptr+14), mm7);		// tmp07=x0-x7	/* Stage 1 */
-	movq_r2r(mm1, mm6);						// write result 2
-
-	paddw_m2r(*(dataptr+14), mm0);		// tmp00=x0+x7	/* Stage 1 */
-	punpckhwd_r2r(mm5, mm4);   			// m33:m23|m32:m22 - interleave third and fourth lines
-
-	paddw_m2r(*(dataptr+12), mm1);		// tmp01=x1+x6	/* Stage 1 */
-	movq_r2r(mm2, mm3);						// copy first intermediate result
-
-	psubw_m2r(*(dataptr+12), mm6);		// tmp06=x1-x6	/* Stage 1 */
-	punpckldq_r2r(mm4, mm2);				// m32:m22|m12:m02 - interleave to produce result 3
-
-   movq_r2m(mm7, tmp7);
-	movq_r2r(mm2, mm5);						// write result 3
-
-   movq_r2m(mm6, tmp6);
-	punpckhdq_r2r(mm4, mm3);				// m33:m23|m13:m03 - interleave to produce result 4
-
-	paddw_m2r(*(dataptr+10), mm2);     	// tmp02=x2+5 /* Stage 1 */
-	movq_r2r(mm3, mm4);						// write result 4
-
-/************************************************************************************************
-					End of Transpose
-************************************************************************************************/
-
-
-   paddw_m2r(*(dataptr+8), mm3);    	// tmp03=x3+x4 /* stage 1*/
-   movq_r2r(mm0, mm7);
-
-   psubw_m2r(*(dataptr+8), mm4);    	// tmp04=x3-x4 /* stage 1*/
-   movq_r2r(mm1, mm6);
-
-	paddw_r2r(mm3, mm0);  					// tmp10 = tmp00 + tmp03 /* even 2 */
-	psubw_r2r(mm3, mm7);  					// tmp13 = tmp00 - tmp03 /* even 2 */
-
-	psubw_r2r(mm2, mm6);  					// tmp12 = tmp01 - tmp02 /* even 2 */
-	paddw_r2r(mm2, mm1);  					// tmp11 = tmp01 + tmp02 /* even 2 */
-
-   psubw_m2r(*(dataptr+10), mm5);    	// tmp05=x2-x5 /* stage 1*/
-	paddw_r2r(mm7, mm6);						// tmp12 + tmp13
-
-	/* stage 3 */
-
-   movq_m2r(tmp6, mm2);
-   movq_r2r(mm0, mm3);
-
-	psllw_i2r(2, mm6);			// m8 * 2^2
-	paddw_r2r(mm1, mm0);		
-
-	pmulhw_m2r(RTjpeg_C4, mm6);			// z1
-	psubw_r2r(mm1, mm3);		
-
-   movq_r2m(mm0, *dataptr);
-   movq_r2r(mm7, mm0);
-   
-    /* Odd part */
-   movq_r2m(mm3, *(dataptr+8));
-	paddw_r2r(mm5, mm4);						// tmp10
-
-   movq_m2r(tmp7, mm3);
-	paddw_r2r(mm6, mm0);						// tmp32
-
-	paddw_r2r(mm2, mm5);						// tmp11
-	psubw_r2r(mm6, mm7);						// tmp33
-
-   movq_r2m(mm0, *(dataptr+4));
-	paddw_r2r(mm3, mm2);						// tmp12
-
-	/* stage 4 */
-
-   movq_r2m(mm7, *(dataptr+12));
-	movq_r2r(mm4, mm1);						// copy of tmp10
-
-	psubw_r2r(mm2, mm1);						// tmp10 - tmp12
-	psllw_i2r(2, mm4);			// m8 * 2^2
-
-	movq_m2r(RTjpeg_C2mC6, mm0);		
-	psllw_i2r(2, mm1);
-
-	pmulhw_m2r(RTjpeg_C6, mm1);			// z5
-	psllw_i2r(2, mm2);
-
-	pmulhw_r2r(mm0, mm4);					// z5
-
-	/* stage 5 */
-
-	pmulhw_m2r(RTjpeg_C2pC6, mm2);
-	psllw_i2r(2, mm5);
-
-	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
-	movq_r2r(mm3, mm0);						// copy tmp7
-
-   movq_m2r(*(dataptr+1), mm7);
-	paddw_r2r(mm1, mm4);						// z2
-
-	paddw_r2r(mm1, mm2);						// z4
-
-	paddw_r2r(mm5, mm0);						// z11
-	psubw_r2r(mm5, mm3);						// z13
-
-	/* stage 6 */
-
-	movq_r2r(mm3, mm5);						// copy z13
-	psubw_r2r(mm4, mm3);						// y3=z13 - z2
-
-	paddw_r2r(mm4, mm5);						// y5=z13 + z2
-	movq_r2r(mm0, mm6);						// copy z11
-
-   movq_r2m(mm3, *(dataptr+6)); 			//save y3
-	psubw_r2r(mm2, mm0);						// y7=z11 - z4
-
-   movq_r2m(mm5, *(dataptr+10)); 		//save y5
-	paddw_r2r(mm2, mm6);						// y1=z11 + z4
-
-   movq_r2m(mm0, *(dataptr+14)); 		//save y7
-
-	/************************************************
-	 *  End of 1st 4 rows
-	 ************************************************/
-
-   movq_m2r(*(dataptr+3), mm1); 			// load x1   /* stage 1 */
-	movq_r2r(mm7, mm0);						// copy x0
-
-   movq_r2m(mm6, *(dataptr+2)); 			//save y1
-
-   movq_m2r(*(dataptr+5), mm2); 			// load x2   /* stage 1 */
-	movq_r2r(mm1, mm6);						// copy x1
-
-   paddw_m2r(*(dataptr+15), mm0);  		// tmp00 = x0 + x7
-
-   movq_m2r(*(dataptr+7), mm3); 			// load x3   /* stage 1 */
-	movq_r2r(mm2, mm5);						// copy x2
-
-   psubw_m2r(*(dataptr+15), mm7);  		// tmp07 = x0 - x7
-	movq_r2r(mm3, mm4);						// copy x3
-
-   paddw_m2r(*(dataptr+13), mm1);  		// tmp01 = x1 + x6
-
-	movq_r2m(mm7, tmp7);						// save tmp07
-	movq_r2r(mm0, mm7);						// copy tmp00
-
-   psubw_m2r(*(dataptr+13), mm6);  		// tmp06 = x1 - x6
-
-   /* stage 2, Even Part */
-
-   paddw_m2r(*(dataptr+9), mm3);  		// tmp03 = x3 + x4
-
-	movq_r2m(mm6, tmp6);						// save tmp07
-	movq_r2r(mm1, mm6);						// copy tmp01
-
-   paddw_m2r(*(dataptr+11), mm2);  		// tmp02 = x2 + x5
-	paddw_r2r(mm3, mm0);              	// tmp10 = tmp00 + tmp03
-
-	psubw_r2r(mm3, mm7);              	// tmp13 = tmp00 - tmp03
-
-   psubw_m2r(*(dataptr+9), mm4);  		// tmp04 = x3 - x4
-	psubw_r2r(mm2, mm6);              	// tmp12 = tmp01 - tmp02
-
-	paddw_r2r(mm2, mm1);              	// tmp11 = tmp01 + tmp02
-
-   psubw_m2r(*(dataptr+11), mm5);  		// tmp05 = x2 - x5
-	paddw_r2r(mm7, mm6);              	//  tmp12 + tmp13
-
-   /* stage 3, Even and stage 4 & 5 even */
-
-	movq_m2r(tmp6, mm2);            		// load tmp6
-	movq_r2r(mm0, mm3);						// copy tmp10
-
-	psllw_i2r(2, mm6);			// shift z1
-	paddw_r2r(mm1, mm0);    				// y0=tmp10 + tmp11
-
-	pmulhw_m2r(RTjpeg_C4, mm6);    		// z1
-	psubw_r2r(mm1, mm3);    				// y4=tmp10 - tmp11
-
-   movq_r2m(mm0, *(dataptr+1)); 			//save y0
-	movq_r2r(mm7, mm0);						// copy tmp13
-  
-	/* odd part */
-
-   movq_r2m(mm3, *(dataptr+9)); 			//save y4
-	paddw_r2r(mm5, mm4);              	// tmp10 = tmp4 + tmp5
-
-	movq_m2r(tmp7, mm3);            		// load tmp7
-	paddw_r2r(mm6, mm0);              	// tmp32 = tmp13 + z1
-
-	paddw_r2r(mm2, mm5);              	// tmp11 = tmp5 + tmp6
-	psubw_r2r(mm6, mm7);              	// tmp33 = tmp13 - z1
-
-   movq_r2m(mm0, *(dataptr+5)); 			//save y2
-	paddw_r2r(mm3, mm2);              	// tmp12 = tmp6 + tmp7
-
-	/* stage 4 */
-
-   movq_r2m(mm7, *(dataptr+13)); 		//save y6
-	movq_r2r(mm4, mm1);						// copy tmp10
-
-	psubw_r2r(mm2, mm1);    				// tmp10 - tmp12
-	psllw_i2r(2, mm4);			// shift tmp10
-
-	movq_m2r(RTjpeg_C2mC6, mm0);			// load C2mC6
-	psllw_i2r(2, mm1);			// shift (tmp10-tmp12)
-
-	pmulhw_m2r(RTjpeg_C6, mm1);    		// z5
-	psllw_i2r(2, mm5);			// prepare for multiply 
-
-	pmulhw_r2r(mm0, mm4);					// multiply by converted real
-
-	/* stage 5 */
-
-	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
-	psllw_i2r(2, mm2);			// prepare for multiply 
-
-	pmulhw_m2r(RTjpeg_C2pC6, mm2);		// multiply
-	movq_r2r(mm3, mm0);						// copy tmp7
-
-	movq_m2r(*(dataptr+9), mm7);		 	// m03:m02|m01:m00 - first line (line 4)and copy into mm7
-	paddw_r2r(mm1, mm4);						// z2
-
-	paddw_r2r(mm5, mm0);						// z11
-	psubw_r2r(mm5, mm3);						// z13
-
-	/* stage 6 */
-
-	movq_r2r(mm3, mm5);						// copy z13
-	paddw_r2r(mm1, mm2);						// z4
-
-	movq_r2r(mm0, mm6);						// copy z11
-	psubw_r2r(mm4, mm5);						// y3
-
-	paddw_r2r(mm2, mm6);						// y1
-	paddw_r2r(mm4, mm3);						// y5
-
-   movq_r2m(mm5, *(dataptr+7)); 			//save y3
-
-   movq_r2m(mm6, *(dataptr+3)); 			//save y1
-	psubw_r2r(mm2, mm0);						// y7
-	
-/************************************************************************************************
-					Start of Transpose
-************************************************************************************************/
-
- 	movq_m2r(*(dataptr+13), mm6);		 	// m23:m22|m21:m20 - third line (line 6)and copy into m2
-	movq_r2r(mm7, mm5);						// copy first line
-
-	punpcklwd_r2r(mm3, mm7); 				// m11:m01|m10:m00 - interleave first and second lines
-	movq_r2r(mm6, mm2);						// copy third line
-
-	punpcklwd_r2r(mm0, mm6);  				// m31:m21|m30:m20 - interleave third and fourth lines
-	movq_r2r(mm7, mm1);						// copy first intermediate result
-
-	punpckldq_r2r(mm6, mm7);				// m30:m20|m10:m00 - interleave to produce result 1
-
-	punpckhdq_r2r(mm6, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
-
-	movq_r2m(mm7, *(dataptr+9));			// write result 1
-	punpckhwd_r2r(mm3, mm5);				// m13:m03|m12:m02 - interleave first and second lines
-
-	movq_r2m(mm1, *(dataptr+11));			// write result 2
-	punpckhwd_r2r(mm0, mm2);				// m33:m23|m32:m22 - interleave third and fourth lines
-
-	movq_r2r(mm5, mm1);						// copy first intermediate result
-	punpckldq_r2r(mm2, mm5);				// m32:m22|m12:m02 - interleave to produce result 3
-
-	movq_m2r(*(dataptr+1), mm0);			// m03:m02|m01:m00 - first line, 4x4
-	punpckhdq_r2r(mm2, mm1);				// m33:m23|m13:m03 - interleave to produce result 4
-
-	movq_r2m(mm5, *(dataptr+13));			// write result 3
-
-	/****** last 4x4 done */
-
-	movq_r2m(mm1, *(dataptr+15));			// write result 4, last 4x4
-
-	movq_m2r(*(dataptr+5), mm2);			// m23:m22|m21:m20 - third line
-	movq_r2r(mm0, mm6);						// copy first line
-
-	punpcklwd_m2r(*(dataptr+3), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
-	movq_r2r(mm2, mm7);						// copy third line
-
-	punpcklwd_m2r(*(dataptr+7), mm2);  	// m31:m21|m30:m20 - interleave third and fourth lines
-	movq_r2r(mm0, mm4);						// copy first intermediate result
-
-	
-
-	movq_m2r(*(dataptr+8), mm1);			// n03:n02|n01:n00 - first line 
-	punpckldq_r2r(mm2, mm0);				// m30:m20|m10:m00 - interleave to produce first result
-
-	movq_m2r(*(dataptr+12), mm3);			// n23:n22|n21:n20 - third line
-	punpckhdq_r2r(mm2, mm4);				// m31:m21|m11:m01 - interleave to produce second result
-
-	punpckhwd_m2r(*(dataptr+3), mm6);  	// m13:m03|m12:m02 - interleave first and second lines
-	movq_r2r(mm1, mm2);						// copy first line
-
-	punpckhwd_m2r(*(dataptr+7), mm7);  	// m33:m23|m32:m22 - interleave third and fourth lines
-	movq_r2r(mm6, mm5);						// copy first intermediate result
-
-	movq_r2m(mm0, *(dataptr+8));			// write result 1
-	punpckhdq_r2r(mm7, mm5);				// m33:m23|m13:m03 - produce third result
-
-	punpcklwd_m2r(*(dataptr+10), mm1);  // n11:n01|n10:n00 - interleave first and second lines
-	movq_r2r(mm3, mm0);						// copy third line
-
-	punpckhwd_m2r(*(dataptr+10), mm2);  // n13:n03|n12:n02 - interleave first and second lines
-
-	movq_r2m(mm4, *(dataptr+10));			// write result 2 out
-	punpckldq_r2r(mm7, mm6);				// m32:m22|m12:m02 - produce fourth result
-
-	punpcklwd_m2r(*(dataptr+14), mm3);  // n33:n23|n32:n22 - interleave third and fourth lines
-	movq_r2r(mm1, mm4);						// copy second intermediate result
-
-	movq_r2m(mm6, *(dataptr+12));			// write result 3 out
-	punpckldq_r2r(mm3, mm1);				// 
-
-	punpckhwd_m2r(*(dataptr+14), mm0);  // n33:n23|n32:n22 - interleave third and fourth lines
-	movq_r2r(mm2, mm6);						// copy second intermediate result
-
-	movq_r2m(mm5, *(dataptr+14));			// write result 4 out
-	punpckhdq_r2r(mm3, mm4);				// n31:n21|n11:n01- produce second result
-
-	movq_r2m(mm1, *(dataptr+1));			// write result 5 out - (first result for other 4 x 4 block)
-	punpckldq_r2r(mm0, mm2);				// n32:n22|n12:n02- produce third result
-
-	movq_r2m(mm4, *(dataptr+3));			// write result 6 out
-	punpckhdq_r2r(mm0, mm6);				// n33:n23|n13:n03 - produce fourth result
-
-	movq_r2m(mm2, *(dataptr+5));			// write result 7 out
-
-	movq_m2r(*dataptr, mm0);				// m03:m02|m01:m00 - first line, first 4x4
-
-	movq_r2m(mm6, *(dataptr+7));			// write result 8 out
-
-// Do first 4x4 quadrant, which is used in the beginning of the DCT:
-
-	movq_m2r(*(dataptr+4), mm7);			// m23:m22|m21:m20 - third line
-	movq_r2r(mm0, mm2);						// copy first line
-
-	punpcklwd_m2r(*(dataptr+2), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
-	movq_r2r(mm7, mm4);						// copy third line
-	
-	punpcklwd_m2r(*(dataptr+6), mm7);  	// m31:m21|m30:m20 - interleave third and fourth lines
-	movq_r2r(mm0, mm1);						// copy first intermediate result
-
-	movq_m2r(*(dataptr+2), mm6);			// m13:m12|m11:m10 - second line
-	punpckldq_r2r(mm7, mm0);				// m30:m20|m10:m00 - interleave to produce result 1
-
-	movq_m2r(*(dataptr+6), mm5);			// m33:m32|m31:m30 - fourth line
-	punpckhdq_r2r(mm7, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
-
-	movq_r2r(mm0, mm7);						// write result 1
-	punpckhwd_r2r(mm6, mm2);				// m13:m03|m12:m02 - interleave first and second lines
-
-	psubw_m2r(*(dataptr+14), mm7);		// tmp07=x0-x7	/* Stage 1 */
-	movq_r2r(mm1, mm6);						// write result 2
-
-	paddw_m2r(*(dataptr+14), mm0);		// tmp00=x0+x7	/* Stage 1 */
-	punpckhwd_r2r(mm5, mm4);   			// m33:m23|m32:m22 - interleave third and fourth lines
-
-	paddw_m2r(*(dataptr+12), mm1);		// tmp01=x1+x6	/* Stage 1 */
-	movq_r2r(mm2, mm3);						// copy first intermediate result
-
-	psubw_m2r(*(dataptr+12), mm6);		// tmp06=x1-x6	/* Stage 1 */
-	punpckldq_r2r(mm4, mm2);				// m32:m22|m12:m02 - interleave to produce result 3
-
-	movq_r2m(mm7, tmp7);						// save tmp07
-	movq_r2r(mm2, mm5);						// write result 3
-
-	movq_r2m(mm6, tmp6);						// save tmp06
-
-	punpckhdq_r2r(mm4, mm3);				// m33:m23|m13:m03 - interleave to produce result 4
-
-	paddw_m2r(*(dataptr+10), mm2);   	// tmp02=x2+x5 /* stage 1 */
-	movq_r2r(mm3, mm4);						// write result 4
-
-/************************************************************************************************
-					End of Transpose 2
-************************************************************************************************/
-
-   paddw_m2r(*(dataptr+8), mm3);    	// tmp03=x3+x4 /* stage 1*/
-   movq_r2r(mm0, mm7);
-
-   psubw_m2r(*(dataptr+8), mm4);    	// tmp04=x3-x4 /* stage 1*/
-   movq_r2r(mm1, mm6);
-
-	paddw_r2r(mm3, mm0);  					// tmp10 = tmp00 + tmp03 /* even 2 */
-	psubw_r2r(mm3, mm7);  					// tmp13 = tmp00 - tmp03 /* even 2 */
-
-	psubw_r2r(mm2, mm6);  					// tmp12 = tmp01 - tmp02 /* even 2 */
-	paddw_r2r(mm2, mm1);  					// tmp11 = tmp01 + tmp02 /* even 2 */
-
-   psubw_m2r(*(dataptr+10), mm5);    	// tmp05=x2-x5 /* stage 1*/
-	paddw_r2r(mm7, mm6);						// tmp12 + tmp13
-
-	/* stage 3 */
-
-   movq_m2r(tmp6, mm2);
-   movq_r2r(mm0, mm3);
-
-	psllw_i2r(2, mm6);			// m8 * 2^2
-	paddw_r2r(mm1, mm0);		
-
-	pmulhw_m2r(RTjpeg_C4, mm6);			// z1
-	psubw_r2r(mm1, mm3);		
-
-   movq_r2m(mm0, *dataptr);
-   movq_r2r(mm7, mm0);
-   
-    /* Odd part */
-   movq_r2m(mm3, *(dataptr+8));
-	paddw_r2r(mm5, mm4);						// tmp10
-
-   movq_m2r(tmp7, mm3);
-	paddw_r2r(mm6, mm0);						// tmp32
-
-	paddw_r2r(mm2, mm5);						// tmp11
-	psubw_r2r(mm6, mm7);						// tmp33
-
-   movq_r2m(mm0, *(dataptr+4));
-	paddw_r2r(mm3, mm2);						// tmp12
-
-	/* stage 4 */
-   movq_r2m(mm7, *(dataptr+12));
-	movq_r2r(mm4, mm1);						// copy of tmp10
-
-	psubw_r2r(mm2, mm1);						// tmp10 - tmp12
-	psllw_i2r(2, mm4);			// m8 * 2^2
-
-	movq_m2r(RTjpeg_C2mC6, mm0);
-	psllw_i2r(2, mm1);
-
-	pmulhw_m2r(RTjpeg_C6, mm1);			// z5
-	psllw_i2r(2, mm2);
-
-	pmulhw_r2r(mm0, mm4);					// z5
-
-	/* stage 5 */
-
-	pmulhw_m2r(RTjpeg_C2pC6, mm2);
-	psllw_i2r(2, mm5);
-
-	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
-	movq_r2r(mm3, mm0);						// copy tmp7
-
-   movq_m2r(*(dataptr+1), mm7);
-	paddw_r2r(mm1, mm4);						// z2
-
-	paddw_r2r(mm1, mm2);						// z4
-
-	paddw_r2r(mm5, mm0);						// z11
-	psubw_r2r(mm5, mm3);						// z13
-
-	/* stage 6 */
-
-	movq_r2r(mm3, mm5);						// copy z13
-	psubw_r2r(mm4, mm3);						// y3=z13 - z2
-
-	paddw_r2r(mm4, mm5);						// y5=z13 + z2
-	movq_r2r(mm0, mm6);						// copy z11
-
-   movq_r2m(mm3, *(dataptr+6)); 			//save y3
-	psubw_r2r(mm2, mm0);						// y7=z11 - z4
-
-   movq_r2m(mm5, *(dataptr+10)); 		//save y5
-	paddw_r2r(mm2, mm6);						// y1=z11 + z4
-
-   movq_r2m(mm0, *(dataptr+14)); 		//save y7
-
-	/************************************************
-	 *  End of 1st 4 rows
-	 ************************************************/
-
-   movq_m2r(*(dataptr+3), mm1); 			// load x1   /* stage 1 */
-	movq_r2r(mm7, mm0);						// copy x0
-
-   movq_r2m(mm6, *(dataptr+2)); 			//save y1
-
-   movq_m2r(*(dataptr+5), mm2); 			// load x2   /* stage 1 */
-	movq_r2r(mm1, mm6);						// copy x1
-
-   paddw_m2r(*(dataptr+15), mm0);  		// tmp00 = x0 + x7
-
-   movq_m2r(*(dataptr+7), mm3); 			// load x3   /* stage 1 */
-	movq_r2r(mm2, mm5);						// copy x2
-
-   psubw_m2r(*(dataptr+15), mm7);  		// tmp07 = x0 - x7
-	movq_r2r(mm3, mm4);						// copy x3
-
-   paddw_m2r(*(dataptr+13), mm1);  		// tmp01 = x1 + x6
-
-	movq_r2m(mm7, tmp7);						// save tmp07
-	movq_r2r(mm0, mm7);						// copy tmp00
-
-   psubw_m2r(*(dataptr+13), mm6);  		// tmp06 = x1 - x6
-
-   /* stage 2, Even Part */
-
-   paddw_m2r(*(dataptr+9), mm3);  		// tmp03 = x3 + x4
-
-	movq_r2m(mm6, tmp6);						// save tmp07
-	movq_r2r(mm1, mm6);						// copy tmp01
-
-   paddw_m2r(*(dataptr+11), mm2);  		// tmp02 = x2 + x5
-	paddw_r2r(mm3, mm0);              	// tmp10 = tmp00 + tmp03
-
-	psubw_r2r(mm3, mm7);              	// tmp13 = tmp00 - tmp03
-
-   psubw_m2r(*(dataptr+9), mm4);  		// tmp04 = x3 - x4
-	psubw_r2r(mm2, mm6);              	// tmp12 = tmp01 - tmp02
-
-	paddw_r2r(mm2, mm1);              	// tmp11 = tmp01 + tmp02
-
-   psubw_m2r(*(dataptr+11), mm5);  		// tmp05 = x2 - x5
-	paddw_r2r(mm7, mm6);              	//  tmp12 + tmp13
-
-   /* stage 3, Even and stage 4 & 5 even */
-
-	movq_m2r(tmp6, mm2);            		// load tmp6
-	movq_r2r(mm0, mm3);						// copy tmp10
-
-	psllw_i2r(2, mm6);			// shift z1
-	paddw_r2r(mm1, mm0);    				// y0=tmp10 + tmp11
-
-	pmulhw_m2r(RTjpeg_C4, mm6);    		// z1
-	psubw_r2r(mm1, mm3);    				// y4=tmp10 - tmp11
-
-   movq_r2m(mm0, *(dataptr+1)); 			//save y0
-	movq_r2r(mm7, mm0);						// copy tmp13
-  
-	/* odd part */
-
-   movq_r2m(mm3, *(dataptr+9)); 			//save y4
-	paddw_r2r(mm5, mm4);              	// tmp10 = tmp4 + tmp5
-
-	movq_m2r(tmp7, mm3);            		// load tmp7
-	paddw_r2r(mm6, mm0);              	// tmp32 = tmp13 + z1
-
-	paddw_r2r(mm2, mm5);              	// tmp11 = tmp5 + tmp6
-	psubw_r2r(mm6, mm7);              	// tmp33 = tmp13 - z1
-
-   movq_r2m(mm0, *(dataptr+5)); 			//save y2
-	paddw_r2r(mm3, mm2);              	// tmp12 = tmp6 + tmp7
-
-	/* stage 4 */
-
-   movq_r2m(mm7, *(dataptr+13)); 		//save y6
-	movq_r2r(mm4, mm1);						// copy tmp10
-
-	psubw_r2r(mm2, mm1);    				// tmp10 - tmp12
-	psllw_i2r(2, mm4);			// shift tmp10
-
-	movq_m2r(RTjpeg_C2mC6, mm0);			// load C2mC6
-	psllw_i2r(2, mm1);			// shift (tmp10-tmp12)
-
-	pmulhw_m2r(RTjpeg_C6, mm1);    		// z5
-	psllw_i2r(2, mm5);			// prepare for multiply 
-
-	pmulhw_r2r(mm0, mm4);					// multiply by converted real
-
-	/* stage 5 */
-
-	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
-	psllw_i2r(2, mm2);			// prepare for multiply 
-
-	pmulhw_m2r(RTjpeg_C2pC6, mm2);		// multiply
-	movq_r2r(mm3, mm0);						// copy tmp7
-
-	movq_m2r(*(dataptr+9), mm7);		 	// m03:m02|m01:m00 - first line (line 4)and copy into mm7
-	paddw_r2r(mm1, mm4);						// z2
-
-	paddw_r2r(mm5, mm0);						// z11
-	psubw_r2r(mm5, mm3);						// z13
-
-	/* stage 6 */
-
-	movq_r2r(mm3, mm5);						// copy z13
-	paddw_r2r(mm1, mm2);						// z4
-
-	movq_r2r(mm0, mm6);						// copy z11
-	psubw_r2r(mm4, mm5);						// y3
-
-	paddw_r2r(mm2, mm6);						// y1
-	paddw_r2r(mm4, mm3);						// y5
-
-   movq_r2m(mm5, *(dataptr+7)); 			//save y3
-	psubw_r2r(mm2, mm0);						// yè=z11 - z4
-
-   movq_r2m(mm3, *(dataptr+11)); 		//save y5
-
-   movq_r2m(mm6, *(dataptr+3)); 			//save y1
-
-   movq_r2m(mm0, *(dataptr+15)); 		//save y7
-	
-
-#endif
-}
-
-#define FIX_1_082392200  ((__s32)  277)		/* FIX(1.082392200) */
-#define FIX_1_414213562  ((__s32)  362)		/* FIX(1.414213562) */
-#define FIX_1_847759065  ((__s32)  473)		/* FIX(1.847759065) */
-#define FIX_2_613125930  ((__s32)  669)		/* FIX(2.613125930) */
-
-#define DESCALE(x) (__s16)( ((x)+4) >> 3)
-
-/* clip yuv to 16..235 (should be 16..240 for cr/cb but ... */
-
-#define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x))
-#define MULTIPLY(var,const)  (((__s32) ((var) * (const)) + 128)>>8)
-
-void RTjpeg_idct_init(void)
-{
- int i;
- 
- for(i=0; i<64; i++)
- {
-  RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32;
-  RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32;
- }
-}
-
-void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip)
-{
-#ifdef MMX
-
-static mmx_t fix_141			= (mmx_t)(long long)0x5a825a825a825a82LL;
-static mmx_t fix_184n261	= (mmx_t)(long long)0xcf04cf04cf04cf04LL;
-static mmx_t fix_184			= (mmx_t)(long long)0x7641764176417641LL;
-static mmx_t fix_n184		= (mmx_t)(long long)0x896f896f896f896fLL;
-static mmx_t fix_108n184	= (mmx_t)(long long)0xcf04cf04cf04cf04LL;
-
-  mmx_t workspace[64];
-  mmx_t *wsptr = workspace;
-  register mmx_t *dataptr = (mmx_t *)odata;
-  mmx_t *idata = (mmx_t *)data;
-
-  rskip = rskip>>3;
-/*
- * Perform inverse DCT on one block of coefficients.
- */
-
-    /* Odd part */
-
-	movq_m2r(*(idata+10), mm1);	// load idata[DCTSIZE*5]
-
-	movq_m2r(*(idata+6), mm0);		// load idata[DCTSIZE*3]
-
-	movq_m2r(*(idata+2), mm3);		// load idata[DCTSIZE*1]
-
-	movq_r2r(mm1, mm2);				// copy tmp6	/* phase 6 */
-
-	movq_m2r(*(idata+14), mm4);	// load idata[DCTSIZE*7]
-
-	paddw_r2r(mm0, mm1);				// z13 = tmp6 + tmp5;
-
-	psubw_r2r(mm0, mm2);				// z10 = tmp6 - tmp5   
-
-	psllw_i2r(2, mm2);				// shift z10
-	movq_r2r(mm2, mm0); 				// copy z10
-
-	pmulhw_m2r(fix_184n261, mm2);	// MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
-	movq_r2r(mm3, mm5);				// copy tmp4
-
-	pmulhw_m2r(fix_n184, mm0);		// MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
-	paddw_r2r(mm4, mm3);				// z11 = tmp4 + tmp7;
-
-	movq_r2r(mm3, mm6);				// copy z11			/* phase 5 */
-	psubw_r2r(mm4, mm5);				// z12 = tmp4 - tmp7;
-
-	psubw_r2r(mm1, mm6);				// z11-z13
-	psllw_i2r(2, mm5);				//	shift z12
-
-	movq_m2r(*(idata+12), mm4);	// load idata[DCTSIZE*6], even part
- 	movq_r2r(mm5, mm7);				//	copy z12
-
-	pmulhw_m2r(fix_108n184, mm5); //	MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
-	paddw_r2r(mm1, mm3);				// tmp7 = z11 + z13;	
-
-	//ok
-
-    /* Even part */
-	pmulhw_m2r(fix_184, mm7);		// MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
-	psllw_i2r(2, mm6);
-
-	movq_m2r(*(idata+4), mm1);		// load idata[DCTSIZE*2]
-
-	paddw_r2r(mm5, mm0);				//	tmp10
-
-	paddw_r2r(mm7, mm2);				// tmp12
-
-	pmulhw_m2r(fix_141, mm6);		// tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-	psubw_r2r(mm3, mm2);				// tmp6 = tmp12 - tmp7
-
-	movq_r2r(mm1, mm5);				// copy tmp1
-	paddw_r2r(mm4, mm1);				// tmp13= tmp1 + tmp3;	/* phases 5-3 */
-
-	psubw_r2r(mm4, mm5);				// tmp1-tmp3
-	psubw_r2r(mm2, mm6);				// tmp5 = tmp11 - tmp6;
-
-	movq_r2m(mm1, *(wsptr));		// save tmp13 in workspace
-	psllw_i2r(2, mm5);	// shift tmp1-tmp3
-    
-	movq_m2r(*(idata), mm7); 		// load idata[DCTSIZE*0]
-
-	pmulhw_m2r(fix_141, mm5);		// MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
-	paddw_r2r(mm6, mm0);				// tmp4 = tmp10 + tmp5;
-
-	movq_m2r(*(idata+8), mm4); 	// load idata[DCTSIZE*4]
-	
-	psubw_r2r(mm1, mm5);				// tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
-	movq_r2m(mm0, *(wsptr+4));		// save tmp4 in workspace
-	movq_r2r(mm7, mm1);			 	// copy tmp0	/* phase 3 */
-
-	movq_r2m(mm5, *(wsptr+2));		// save tmp12 in workspace
-	psubw_r2r(mm4, mm1);				// tmp11 = tmp0 - tmp2; 
-
-	paddw_r2r(mm4, mm7);				// tmp10 = tmp0 + tmp2;
-   movq_r2r(mm1, mm5);				// copy tmp11
-	
-	paddw_m2r(*(wsptr+2), mm1);	// tmp1 = tmp11 + tmp12;
-	movq_r2r(mm7, mm4);				// copy tmp10		/* phase 2 */
-
-	paddw_m2r(*(wsptr), mm7);		// tmp0 = tmp10 + tmp13;	
-
-	psubw_m2r(*(wsptr), mm4);		// tmp3 = tmp10 - tmp13;
-	movq_r2r(mm7, mm0);				//	copy tmp0
-
-	psubw_m2r(*(wsptr+2), mm5);	// tmp2 = tmp11 - tmp12;
-	paddw_r2r(mm3, mm7);				//	wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-	
-	psubw_r2r(mm3, mm0);				// wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-
-	movq_r2m(mm7, *(wsptr));		//	wsptr[DCTSIZE*0]
-	movq_r2r(mm1, mm3);				//	copy tmp1
-
-	movq_r2m(mm0, *(wsptr+14));		// wsptr[DCTSIZE*7]
-	paddw_r2r(mm2, mm1);				// wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-
-	psubw_r2r(mm2, mm3);				// wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-
-	movq_r2m(mm1, *(wsptr+2));		// wsptr[DCTSIZE*1]
-	movq_r2r(mm4, mm1);				//	copy tmp3
-
-	movq_r2m(mm3, *(wsptr+12));		// wsptr[DCTSIZE*6]
-
-	paddw_m2r(*(wsptr+4), mm4);	// wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
-
-	psubw_m2r(*(wsptr+4), mm1); 	// wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
-
-	movq_r2m(mm4, *(wsptr+8));		
-	movq_r2r(mm5, mm7);				// copy tmp2
-
-	paddw_r2r(mm6, mm5);				// wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
-
-	movq_r2m(mm1, *(wsptr+6));	
-	psubw_r2r(mm6, mm7);				//	wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-
-	movq_r2m(mm5, *(wsptr+4));	
-
-	movq_r2m(mm7, *(wsptr+10));		
-
-	//ok
-
-
-/*****************************************************************/
-
-	idata++;
-	wsptr++;
-
-/*****************************************************************/
-
-	movq_m2r(*(idata+10), mm1);	// load idata[DCTSIZE*5]
-
-	movq_m2r(*(idata+6), mm0);		// load idata[DCTSIZE*3]
-
-	movq_m2r(*(idata+2),	mm3);		// load idata[DCTSIZE*1]
-	movq_r2r(mm1, mm2);				//	copy tmp6	/* phase 6 */
-
-	movq_m2r(*(idata+14),	mm4);		// load idata[DCTSIZE*7]
-	paddw_r2r(mm0, mm1);				//	z13 = tmp6 + tmp5;
-
-	psubw_r2r(mm0, mm2);				//	z10 = tmp6 - tmp5   
-
-	psllw_i2r(2, mm2);				//	shift z10
-	movq_r2r(mm2, mm0);				//	copy z10
-
-	pmulhw_m2r(fix_184n261, mm2);	// MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
-	movq_r2r(mm3, mm5);				//	copy tmp4
-
-	pmulhw_m2r(fix_n184, mm0);		// MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
-	paddw_r2r(mm4, mm3);				// z11 = tmp4 + tmp7;
-
-	movq_r2r(mm3, mm6);				// copy z11			/* phase 5 */
-	psubw_r2r(mm4, mm5);				//	z12 = tmp4 - tmp7;
-
-	psubw_r2r(mm1, mm6);				// z11-z13
-	psllw_i2r(2, mm5);				//	shift z12
-
-	movq_m2r(*(idata+12), mm4);	// load idata[DCTSIZE*6], even part
- 	movq_r2r(mm5, mm7);				// copy z12
-
-	pmulhw_m2r(fix_108n184, mm5);	// MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
-	paddw_r2r(mm1, mm3);				// tmp7 = z11 + z13;	
-
-	//ok
-
-    /* Even part */
-	pmulhw_m2r(fix_184, mm7);		// MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
-	psllw_i2r(2, mm6);
-
-	movq_m2r(*(idata+4), mm1);		// load idata[DCTSIZE*2]
-
-	paddw_r2r(mm5, mm0);				//	tmp10
-
-	paddw_r2r(mm7, mm2);				// tmp12
-
-	pmulhw_m2r(fix_141, mm6);		// tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-	psubw_r2r(mm3, mm2);				// tmp6 = tmp12 - tmp7
-
-	movq_r2r(mm1, mm5);				// copy tmp1
-	paddw_r2r(mm4, mm1);				// tmp13= tmp1 + tmp3;	/* phases 5-3 */
-
-	psubw_r2r(mm4, mm5);				// tmp1-tmp3
-	psubw_r2r(mm2, mm6);				// tmp5 = tmp11 - tmp6;
-
-	movq_r2m(mm1, *(wsptr));		// save tmp13 in workspace
-	psllw_i2r(2, mm5); 				// shift tmp1-tmp3
-    
-	movq_m2r(*(idata), mm7);		// load idata[DCTSIZE*0]
-	paddw_r2r(mm6, mm0);				// tmp4 = tmp10 + tmp5;
-
-	pmulhw_m2r(fix_141, mm5);		// MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
-
-	movq_m2r(*(idata+8), mm4);    // load idata[DCTSIZE*4]
-	
-	psubw_r2r(mm1, mm5);				// tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
-	movq_r2m(mm0, *(wsptr+4));		// save tmp4 in workspace
-	movq_r2r(mm7, mm1);				// copy tmp0	/* phase 3 */
-
-	movq_r2m(mm5, *(wsptr+2));		// save tmp12 in workspace
-	psubw_r2r(mm4, mm1);				// tmp11 = tmp0 - tmp2; 
-
-	paddw_r2r(mm4, mm7);				// tmp10 = tmp0 + tmp2;
-   movq_r2r(mm1, mm5);				// copy tmp11
-	
-	paddw_m2r(*(wsptr+2), mm1);	// tmp1 = tmp11 + tmp12;
-	movq_r2r(mm7, mm4);				// copy tmp10		/* phase 2 */
-
-	paddw_m2r(*(wsptr), mm7);		// tmp0 = tmp10 + tmp13;	
-
-	psubw_m2r(*(wsptr), mm4);		// tmp3 = tmp10 - tmp13;
-	movq_r2r(mm7, mm0);				// copy tmp0
-
-	psubw_m2r(*(wsptr+2), mm5);	// tmp2 = tmp11 - tmp12;
-	paddw_r2r(mm3, mm7);				// wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-	
-	psubw_r2r(mm3, mm0);				// wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-
-	movq_r2m(mm7, *(wsptr));		// wsptr[DCTSIZE*0]
-	movq_r2r(mm1, mm3);				// copy tmp1
-
-	movq_r2m(mm0, *(wsptr+14));		// wsptr[DCTSIZE*7]
-	paddw_r2r(mm2, mm1);				// wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-
-	psubw_r2r(mm2, mm3);				// wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-
-	movq_r2m(mm1, *(wsptr+2));		// wsptr[DCTSIZE*1]
-	movq_r2r(mm4, mm1);				// copy tmp3
-
-	movq_r2m(mm3, *(wsptr+12));		// wsptr[DCTSIZE*6]
-
-	paddw_m2r(*(wsptr+4), mm4);	// wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
-
-	psubw_m2r(*(wsptr+4), mm1);	// wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
-
-	movq_r2m(mm4, *(wsptr+8));		
-	movq_r2r(mm5, mm7);				// copy tmp2
-
-	paddw_r2r(mm6, mm5);				// wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
-
-	movq_r2m(mm1, *(wsptr+6));		
-	psubw_r2r(mm6, mm7);				// wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-
-	movq_r2m(mm5, *(wsptr+4));	
-
-	movq_r2m(mm7, *(wsptr+10));
-
-/*****************************************************************/
-
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
-
-/*****************************************************************/
-    /* Even part */
-
-	wsptr--;
-
-//    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-//    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-//    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-//    tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
-	movq_m2r(*(wsptr), mm0);		// wsptr[0,0],[0,1],[0,2],[0,3]
-
-	movq_m2r(*(wsptr+1),	mm1);		// wsptr[0,4],[0,5],[0,6],[0,7]
-	movq_r2r(mm0, mm2);
-	
-	movq_m2r(*(wsptr+2), mm3);		// wsptr[1,0],[1,1],[1,2],[1,3]
-	paddw_r2r(mm1, mm0);				// wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
-
-	movq_m2r(*(wsptr+3), mm4);		// wsptr[1,4],[1,5],[1,6],[1,7]
-	psubw_r2r(mm1, mm2);				// wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
-
-	movq_r2r(mm0, mm6);
-	movq_r2r(mm3, mm5);
-	
-	paddw_r2r(mm4, mm3);				// wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
-	movq_r2r(mm2, mm1);
-
-	psubw_r2r(mm4, mm5);				// wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
-	punpcklwd_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
-
-	movq_m2r(*(wsptr+7), mm7);		// wsptr[3,4],[3,5],[3,6],[3,7]
-	punpckhwd_r2r(mm3, mm6);		// wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
-
-	movq_m2r(*(wsptr+4), mm3);		// wsptr[2,0],[2,1],[2,2],[2,3]
-	punpckldq_r2r(mm6, mm0);		// wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-
-	punpcklwd_r2r(mm5, mm1);		// wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
-	movq_r2r(mm3, mm4);
-
-	movq_m2r(*(wsptr+6), mm6);		// wsptr[3,0],[3,1],[3,2],[3,3]
-	punpckhwd_r2r(mm5, mm2);		// wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
-
-	movq_m2r(*(wsptr+5), mm5);		// wsptr[2,4],[2,5],[2,6],[2,7]
-	punpckldq_r2r(mm2, mm1);		// wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
-	
-	paddw_r2r(mm5, mm3);				// wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
-	movq_r2r(mm6, mm2);
-
-	psubw_r2r(mm5, mm4);				// wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
-	paddw_r2r(mm7, mm6);				// wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
-
-	movq_r2r(mm3, mm5);
-	punpcklwd_r2r(mm6, mm3);		// wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
-	
-	psubw_r2r(mm7, mm2);				// wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
-	punpckhwd_r2r(mm6, mm5);		// wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
-
-	movq_r2r(mm4, mm7);
-	punpckldq_r2r(mm5, mm3);		// wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
-						 
-	punpcklwd_r2r(mm2, mm4);		// wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
-
-	punpckhwd_r2r(mm2, mm7);		// wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
-
-	punpckldq_r2r(mm7, mm4);		// wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
-	movq_r2r(mm1, mm6);
-
-	//ok
-
-//	mm0 = 	;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-//	mm1 =	;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
-
-	movq_r2r(mm0, mm2);
-	punpckhdq_r2r(mm4, mm6);		// wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
-
-	punpckldq_r2r(mm4, mm1);		// wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
-	psllw_i2r(2, mm6);
-
-	pmulhw_m2r(fix_141, mm6);
-	punpckldq_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
-
-	punpckhdq_r2r(mm3, mm2);		// wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
-	movq_r2r(mm0, mm7);
-
-//    tmp0 = tmp10 + tmp13;
-//    tmp3 = tmp10 - tmp13;
-	paddw_r2r(mm2, mm0);				// [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
-	psubw_r2r(mm2, mm7);				// [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
-
-//    tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
-	psubw_r2r(mm2, mm6);				// wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
-//    tmp1 = tmp11 + tmp12;
-//    tmp2 = tmp11 - tmp12;
-	movq_r2r(mm1, mm5);
-
-	//OK
-
-    /* Odd part */
-
-//    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-//    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-//    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-//    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
-	movq_m2r(*(wsptr), mm3);		// wsptr[0,0],[0,1],[0,2],[0,3]
-	paddw_r2r(mm6, mm1);				// [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
-
-	movq_m2r(*(wsptr+1), mm4);		// wsptr[0,4],[0,5],[0,6],[0,7]
-	psubw_r2r(mm6, mm5);				// [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
-
-	movq_r2r(mm3, mm6);
-	punpckldq_r2r(mm4, mm3);		// wsptr[0,0],[0,1],[0,4],[0,5]
-
-	punpckhdq_r2r(mm6, mm4);		// wsptr[0,6],[0,7],[0,2],[0,3]
-	movq_r2r(mm3, mm2);
-
-//Save tmp0 and tmp1 in wsptr
-	movq_r2m(mm0, *(wsptr));		// save tmp0
-	paddw_r2r(mm4, mm2);				// wsptr[xxx],[0,z11],[xxx],[0,z13]
-
-	
-//Continue with z10 --- z13
-	movq_m2r(*(wsptr+2), mm6);		// wsptr[1,0],[1,1],[1,2],[1,3]
-	psubw_r2r(mm4, mm3);				// wsptr[xxx],[0,z12],[xxx],[0,z10]
-
-	movq_m2r(*(wsptr+3), mm0);		// wsptr[1,4],[1,5],[1,6],[1,7]
-	movq_r2r(mm6, mm4);
-
-	movq_r2m(mm1, *(wsptr+1));		// save tmp1
-	punpckldq_r2r(mm0, mm6);		// wsptr[1,0],[1,1],[1,4],[1,5]
-
-	punpckhdq_r2r(mm4, mm0);		// wsptr[1,6],[1,7],[1,2],[1,3]
-	movq_r2r(mm6, mm1);
-	
-//Save tmp2 and tmp3 in wsptr
-	paddw_r2r(mm0, mm6);				// wsptr[xxx],[1,z11],[xxx],[1,z13]
-	movq_r2r(mm2, mm4);
-	
-//Continue with z10 --- z13
-	movq_r2m(mm5, *(wsptr+2));		// save tmp2
-	punpcklwd_r2r(mm6, mm2);		// wsptr[xxx],[xxx],[0,z11],[1,z11]
-
-	psubw_r2r(mm0, mm1);				// wsptr[xxx],[1,z12],[xxx],[1,z10]
-	punpckhwd_r2r(mm6, mm4);		// wsptr[xxx],[xxx],[0,z13],[1,z13]
-
-	movq_r2r(mm3, mm0);
-	punpcklwd_r2r(mm1, mm3);		// wsptr[xxx],[xxx],[0,z12],[1,z12]
-
-	movq_r2m(mm7, *(wsptr+3));		// save tmp3
-	punpckhwd_r2r(mm1, mm0);		// wsptr[xxx],[xxx],[0,z10],[1,z10]
-
-	movq_m2r(*(wsptr+4), mm6);		// wsptr[2,0],[2,1],[2,2],[2,3]
-	punpckhdq_r2r(mm2, mm0);		// wsptr[0,z10],[1,z10],[0,z11],[1,z11]
-
-	movq_m2r(*(wsptr+5), mm7);	// wsptr[2,4],[2,5],[2,6],[2,7]
-	punpckhdq_r2r(mm4, mm3);		// wsptr[0,z12],[1,z12],[0,z13],[1,z13]
-
-	movq_m2r(*(wsptr+6), mm1);	// wsptr[3,0],[3,1],[3,2],[3,3]
-	movq_r2r(mm6, mm4);
-
-	punpckldq_r2r(mm7, mm6);		// wsptr[2,0],[2,1],[2,4],[2,5]
-	movq_r2r(mm1, mm5);
-
-	punpckhdq_r2r(mm4, mm7);		// wsptr[2,6],[2,7],[2,2],[2,3]
-	movq_r2r(mm6, mm2);
-	
-	movq_m2r(*(wsptr+7), mm4);	// wsptr[3,4],[3,5],[3,6],[3,7]
-	paddw_r2r(mm7, mm6);				// wsptr[xxx],[2,z11],[xxx],[2,z13]
-
-	psubw_r2r(mm7, mm2);				// wsptr[xxx],[2,z12],[xxx],[2,z10]
-	punpckldq_r2r(mm4, mm1);		// wsptr[3,0],[3,1],[3,4],[3,5]
-
-	punpckhdq_r2r(mm5, mm4);		// wsptr[3,6],[3,7],[3,2],[3,3]
-	movq_r2r(mm1, mm7);
-
-	paddw_r2r(mm4, mm1);				// wsptr[xxx],[3,z11],[xxx],[3,z13]
-	psubw_r2r(mm4, mm7);				// wsptr[xxx],[3,z12],[xxx],[3,z10]
-
-	movq_r2r(mm6, mm5);
-	punpcklwd_r2r(mm1, mm6);		// wsptr[xxx],[xxx],[2,z11],[3,z11]
-
-	punpckhwd_r2r(mm1, mm5);		// wsptr[xxx],[xxx],[2,z13],[3,z13]
-	movq_r2r(mm2, mm4);
-
-	punpcklwd_r2r(mm7, mm2);		// wsptr[xxx],[xxx],[2,z12],[3,z12]
-
-	punpckhwd_r2r(mm7, mm4);		// wsptr[xxx],[xxx],[2,z10],[3,z10]
-
-	punpckhdq_r2r(mm6, mm4);		/// wsptr[2,z10],[3,z10],[2,z11],[3,z11]
-
-	punpckhdq_r2r(mm5, mm2);		// wsptr[2,z12],[3,z12],[2,z13],[3,z13]
-	movq_r2r(mm0, mm5);
-
-	punpckldq_r2r(mm4, mm0);		// wsptr[0,z10],[1,z10],[2,z10],[3,z10]
-
-	punpckhdq_r2r(mm4, mm5);		// wsptr[0,z11],[1,z11],[2,z11],[3,z11]
-	movq_r2r(mm3, mm4);
-
-	punpckhdq_r2r(mm2, mm4);		// wsptr[0,z13],[1,z13],[2,z13],[3,z13]
-	movq_r2r(mm5, mm1);
-
-	punpckldq_r2r(mm2, mm3);		// wsptr[0,z12],[1,z12],[2,z12],[3,z12]
-//    tmp7 = z11 + z13;		/* phase 5 */
-//    tmp8 = z11 - z13;		/* phase 5 */
-	psubw_r2r(mm4, mm1);				// tmp8
-
-	paddw_r2r(mm4, mm5);				// tmp7
-//    tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
-	psllw_i2r(2, mm1);
-
-	psllw_i2r(2, mm0);
-
-	pmulhw_m2r(fix_141, mm1);		// tmp21
-//    tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065))  /* 2*(c2-c6) */
-//			+ MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
-	psllw_i2r(2, mm3);
-	movq_r2r(mm0, mm7);
-
-	pmulhw_m2r(fix_n184, mm7);
-	movq_r2r(mm3, mm6);
-
-	movq_m2r(*(wsptr), mm2);		// tmp0,final1
-
-	pmulhw_m2r(fix_108n184, mm6);
-//	 tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
-//			+ MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
-	movq_r2r(mm2, mm4);				// final1
-  
-	pmulhw_m2r(fix_184n261, mm0);
-	paddw_r2r(mm5, mm2);				// tmp0+tmp7,final1
-
-	pmulhw_m2r(fix_184, mm3);
-	psubw_r2r(mm5, mm4);				// tmp0-tmp7,final1
-
-//    tmp6 = tmp22 - tmp7;	/* phase 2 */
-	psraw_i2r(3, mm2);				// outptr[0,0],[1,0],[2,0],[3,0],final1
-
-	paddw_r2r(mm6, mm7);				// tmp20
-	psraw_i2r(3, mm4);				// outptr[0,7],[1,7],[2,7],[3,7],final1
-
-	paddw_r2r(mm0, mm3);				// tmp22
-
-//    tmp5 = tmp21 - tmp6;
-	psubw_r2r(mm5, mm3);				// tmp6
-
-//    tmp4 = tmp20 + tmp5;
-	movq_m2r(*(wsptr+1), mm0);		// tmp1,final2
-	psubw_r2r(mm3, mm1);				// tmp5
-
-	movq_r2r(mm0, mm6);				// final2
-	paddw_r2r(mm3, mm0);				// tmp1+tmp6,final2
-
-    /* Final output stage: scale down by a factor of 8 and range-limit */
-
-
-//    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-//			    & RANGE_MASK];	final1
-
-
-//    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-//			    & RANGE_MASK];	final2
-	psubw_r2r(mm3, mm6);				// tmp1-tmp6,final2
-	psraw_i2r(3, mm0);				// outptr[0,1],[1,1],[2,1],[3,1]
-
-	psraw_i2r(3, mm6);				// outptr[0,6],[1,6],[2,6],[3,6]
-	
-	packuswb_r2r(mm4, mm0);			// out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
-	
-	movq_m2r(*(wsptr+2), mm5);		// tmp2,final3
-	packuswb_r2r(mm6, mm2);			// out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
-
-//    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-//			    & RANGE_MASK];	final3
-	paddw_r2r(mm1, mm7);				// tmp4
-	movq_r2r(mm5, mm3);
-
-	paddw_r2r(mm1, mm5);				// tmp2+tmp5
-	psubw_r2r(mm1, mm3);				// tmp2-tmp5
-
-	psraw_i2r(3, mm5);				// outptr[0,2],[1,2],[2,2],[3,2]
-
-	movq_m2r(*(wsptr+3), mm4);		// tmp3,final4
-	psraw_i2r(3, mm3);				// outptr[0,5],[1,5],[2,5],[3,5]
-
-
-
-//    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-//			    & RANGE_MASK];	final4
-	movq_r2r(mm4, mm6);
-	paddw_r2r(mm7, mm4);				// tmp3+tmp4
-
-	psubw_r2r(mm7, mm6);				// tmp3-tmp4
-	psraw_i2r(3, mm4);				// outptr[0,4],[1,4],[2,4],[3,4]
-
-	// mov			ecx, [dataptr]
-
-	psraw_i2r(3, mm6);				// outptr[0,3],[1,3],[2,3],[3,3]
-
-	packuswb_r2r(mm4, mm5);			// out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
-
-	packuswb_r2r(mm3, mm6);			// out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
-	movq_r2r(mm2, mm4);
-
-	movq_r2r(mm5, mm7);
-	punpcklbw_r2r(mm0, mm2);		// out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
-
-	punpckhbw_r2r(mm0, mm4);		// out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
-	movq_r2r(mm2, mm1);
-
-	punpcklbw_r2r(mm6, mm5);		// out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
-
-	// add		 	dataptr, 4
-
-	punpckhbw_r2r(mm6, mm7);		// out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
-
-	punpcklwd_r2r(mm5, mm2);		// out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
-	
-	// add			ecx, output_col
-
-	movq_r2r(mm7, mm6);
-	punpckhwd_r2r(mm5, mm1);		// out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
-
-	movq_r2r(mm2, mm0);
-	punpcklwd_r2r(mm4, mm6);		// out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
-
-	// mov			idata, [dataptr]
-	
-	punpckldq_r2r(mm6, mm2);		// out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
-
-	// add		 	dataptr, 4
-	 
-	movq_r2r(mm1, mm3);
-
-	// add			idata, output_col 
-	
-	punpckhwd_r2r(mm4, mm7);		// out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
-	
-	movq_r2m(mm2, *(dataptr));
-	
-	punpckhdq_r2r(mm6, mm0);		// out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
-
-	dataptr += rskip;
-	movq_r2m(mm0, *(dataptr));
-
-	punpckldq_r2r(mm7, mm1);		// out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
-	punpckhdq_r2r(mm7, mm3);		// out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
-	
-	dataptr += rskip;
-	movq_r2m(mm1, *(dataptr));
-
-	dataptr += rskip;
-	movq_r2m(mm3, *(dataptr));
-
-/*******************************************************************/
-
-	wsptr += 8;
-
-/*******************************************************************/
-
-//    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-//    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-//    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-//    tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
-	movq_m2r(*(wsptr), mm0);		// wsptr[0,0],[0,1],[0,2],[0,3]
-
-	movq_m2r(*(wsptr+1), mm1);		// wsptr[0,4],[0,5],[0,6],[0,7]
-	movq_r2r(mm0, mm2);
-	
-	movq_m2r(*(wsptr+2), mm3);		// wsptr[1,0],[1,1],[1,2],[1,3]
-	paddw_r2r(mm1, mm0);				// wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
-
-	movq_m2r(*(wsptr+3), mm4);		// wsptr[1,4],[1,5],[1,6],[1,7]
-	psubw_r2r(mm1, mm2);				// wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
-
-	movq_r2r(mm0, mm6);
-	movq_r2r(mm3, mm5);
-	
-	paddw_r2r(mm4, mm3);				// wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
-	movq_r2r(mm2, mm1);
-
-	psubw_r2r(mm4, mm5);				// wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
-	punpcklwd_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
-
-	movq_m2r(*(wsptr+7), mm7);	// wsptr[3,4],[3,5],[3,6],[3,7]
-	punpckhwd_r2r(mm3, mm6);		// wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
-
-	movq_m2r(*(wsptr+4),	mm3);		// wsptr[2,0],[2,1],[2,2],[2,3]
-	punpckldq_r2r(mm6, mm0);		// wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-
-	punpcklwd_r2r(mm5, mm1);		// wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
-	movq_r2r(mm3, mm4);
-
-	movq_m2r(*(wsptr+6), mm6);	// wsptr[3,0],[3,1],[3,2],[3,3]
-	punpckhwd_r2r(mm5, mm2);		// wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
-
-	movq_m2r(*(wsptr+5), mm5);	// wsptr[2,4],[2,5],[2,6],[2,7]
-	punpckldq_r2r(mm2, mm1);		// wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
-	paddw_r2r(mm5, mm3);				// wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
-	movq_r2r(mm6, mm2);
-
-	psubw_r2r(mm5, mm4);				// wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
-	paddw_r2r(mm7, mm6);				// wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
-
-	movq_r2r(mm3, mm5);
-	punpcklwd_r2r(mm6, mm3);		// wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
-	
-	psubw_r2r(mm7, mm2);				// wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
-	punpckhwd_r2r(mm6, mm5);		// wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
-
-	movq_r2r(mm4, mm7);
-	punpckldq_r2r(mm5, mm3);		// wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
-
-	punpcklwd_r2r(mm2, mm4);		// wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
-
-	punpckhwd_r2r(mm2, mm7);		// wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
-
-	punpckldq_r2r(mm7, mm4);		// wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
-	movq_r2r(mm1, mm6);
-
-	//OK
-
-//	mm0 = 	;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-//	mm1 =	;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
-	movq_r2r(mm0, mm2);
-	punpckhdq_r2r(mm4, mm6);		// wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
-
-	punpckldq_r2r(mm4, mm1);		// wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
-	psllw_i2r(2, mm6);
-
-	pmulhw_m2r(fix_141, mm6);
-	punpckldq_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
-
-	punpckhdq_r2r(mm3, mm2);		// wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
-	movq_r2r(mm0, mm7);
-
-//    tmp0 = tmp10 + tmp13;
-//    tmp3 = tmp10 - tmp13;
-	paddw_r2r(mm2, mm0);				// [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
-	psubw_r2r(mm2, mm7);				// [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
-
-//    tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
-	psubw_r2r(mm2, mm6);				// wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
-//    tmp1 = tmp11 + tmp12;
-//    tmp2 = tmp11 - tmp12;
-	movq_r2r(mm1, mm5);
-
-	 //OK
-
-
-    /* Odd part */
-
-//    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-//    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-//    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-//    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
-	movq_m2r(*(wsptr), mm3);		// wsptr[0,0],[0,1],[0,2],[0,3]
-	paddw_r2r(mm6, mm1);				// [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
-
-	movq_m2r(*(wsptr+1),	mm4);		// wsptr[0,4],[0,5],[0,6],[0,7]
-	psubw_r2r(mm6, mm5);				// [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
-
-	movq_r2r(mm3, mm6);
-	punpckldq_r2r(mm4, mm3);		// wsptr[0,0],[0,1],[0,4],[0,5]
-
-	punpckhdq_r2r(mm6, mm4);		// wsptr[0,6],[0,7],[0,2],[0,3]
-	movq_r2r(mm3, mm2);
-
-//Save tmp0 and tmp1 in wsptr
-	movq_r2m(mm0, *(wsptr));		// save tmp0
-	paddw_r2r(mm4, mm2);				// wsptr[xxx],[0,z11],[xxx],[0,z13]
-
-	
-//Continue with z10 --- z13
-	movq_m2r(*(wsptr+2), mm6);		// wsptr[1,0],[1,1],[1,2],[1,3]
-	psubw_r2r(mm4, mm3);				// wsptr[xxx],[0,z12],[xxx],[0,z10]
-
-	movq_m2r(*(wsptr+3), mm0);		// wsptr[1,4],[1,5],[1,6],[1,7]
-	movq_r2r(mm6, mm4);
-
-	movq_r2m(mm1, *(wsptr+1));		// save tmp1
-	punpckldq_r2r(mm0, mm6);		// wsptr[1,0],[1,1],[1,4],[1,5]
-
-	punpckhdq_r2r(mm4, mm0);		// wsptr[1,6],[1,7],[1,2],[1,3]
-	movq_r2r(mm6, mm1);
-	
-//Save tmp2 and tmp3 in wsptr
-	paddw_r2r(mm0, mm6);				// wsptr[xxx],[1,z11],[xxx],[1,z13]
-	movq_r2r(mm2, mm4);
-	
-//Continue with z10 --- z13
-	movq_r2m(mm5, *(wsptr+2));		// save tmp2
-	punpcklwd_r2r(mm6, mm2);		// wsptr[xxx],[xxx],[0,z11],[1,z11]
-
-	psubw_r2r(mm0, mm1);				// wsptr[xxx],[1,z12],[xxx],[1,z10]
-	punpckhwd_r2r(mm6, mm4);		// wsptr[xxx],[xxx],[0,z13],[1,z13]
-
-	movq_r2r(mm3, mm0);
-	punpcklwd_r2r(mm1, mm3);		// wsptr[xxx],[xxx],[0,z12],[1,z12]
-
-	movq_r2m(mm7, *(wsptr+3));		// save tmp3
-	punpckhwd_r2r(mm1, mm0);		// wsptr[xxx],[xxx],[0,z10],[1,z10]
-
-	movq_m2r(*(wsptr+4), mm6);		// wsptr[2,0],[2,1],[2,2],[2,3]
-	punpckhdq_r2r(mm2, mm0);		// wsptr[0,z10],[1,z10],[0,z11],[1,z11]
-
-	movq_m2r(*(wsptr+5), mm7);	// wsptr[2,4],[2,5],[2,6],[2,7]
-	punpckhdq_r2r(mm4, mm3);		// wsptr[0,z12],[1,z12],[0,z13],[1,z13]
-
-	movq_m2r(*(wsptr+6), mm1);	// wsptr[3,0],[3,1],[3,2],[3,3]
-	movq_r2r(mm6, mm4);
-
-	punpckldq_r2r(mm7, mm6);		// wsptr[2,0],[2,1],[2,4],[2,5]
-	movq_r2r(mm1, mm5);
-
-	punpckhdq_r2r(mm4, mm7);		// wsptr[2,6],[2,7],[2,2],[2,3]
-	movq_r2r(mm6, mm2);
-	
-	movq_m2r(*(wsptr+7), mm4);	// wsptr[3,4],[3,5],[3,6],[3,7]
-	paddw_r2r(mm7, mm6);				// wsptr[xxx],[2,z11],[xxx],[2,z13]
-
-	psubw_r2r(mm7, mm2);				// wsptr[xxx],[2,z12],[xxx],[2,z10]
-	punpckldq_r2r(mm4, mm1);		// wsptr[3,0],[3,1],[3,4],[3,5]
-
-	punpckhdq_r2r(mm5, mm4);		// wsptr[3,6],[3,7],[3,2],[3,3]
-	movq_r2r(mm1, mm7);
-
-	paddw_r2r(mm4, mm1);				// wsptr[xxx],[3,z11],[xxx],[3,z13]
-	psubw_r2r(mm4, mm7);				// wsptr[xxx],[3,z12],[xxx],[3,z10]
-
-	movq_r2r(mm6, mm5);
-	punpcklwd_r2r(mm1, mm6);		// wsptr[xxx],[xxx],[2,z11],[3,z11]
-
-	punpckhwd_r2r(mm1, mm5);		// wsptr[xxx],[xxx],[2,z13],[3,z13]
-	movq_r2r(mm2, mm4);
-
-	punpcklwd_r2r(mm7, mm2);		// wsptr[xxx],[xxx],[2,z12],[3,z12]
-
-	punpckhwd_r2r(mm7, mm4);		// wsptr[xxx],[xxx],[2,z10],[3,z10]
-
-	punpckhdq_r2r(mm6, mm4);		// wsptr[2,z10],[3,z10],[2,z11],[3,z11]
-
-	punpckhdq_r2r(mm5, mm2);		// wsptr[2,z12],[3,z12],[2,z13],[3,z13]
-	movq_r2r(mm0, mm5);
-
-	punpckldq_r2r(mm4, mm0);		// wsptr[0,z10],[1,z10],[2,z10],[3,z10]
-
-	punpckhdq_r2r(mm4, mm5);		// wsptr[0,z11],[1,z11],[2,z11],[3,z11]
-	movq_r2r(mm3, mm4);
-
-	punpckhdq_r2r(mm2, mm4);		// wsptr[0,z13],[1,z13],[2,z13],[3,z13]
-	movq_r2r(mm5, mm1);
-
-	punpckldq_r2r(mm2, mm3);		// wsptr[0,z12],[1,z12],[2,z12],[3,z12]
-//    tmp7 = z11 + z13;		/* phase 5 */
-//    tmp8 = z11 - z13;		/* phase 5 */
-	psubw_r2r(mm4, mm1);				// tmp8
-
-	paddw_r2r(mm4, mm5);				// tmp7
-//    tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
-	psllw_i2r(2, mm1);
-
-	psllw_i2r(2, mm0);
-
-	pmulhw_m2r(fix_141, mm1);		// tmp21
-//    tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065))  /* 2*(c2-c6) */
-//			+ MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
-	psllw_i2r(2, mm3);
-	movq_r2r(mm0, mm7);
-
-	pmulhw_m2r(fix_n184, mm7);
-	movq_r2r(mm3, mm6);
-
-	movq_m2r(*(wsptr), mm2);		// tmp0,final1
-
-	pmulhw_m2r(fix_108n184, mm6);
-//	 tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
-//			+ MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
-	movq_r2r(mm2, mm4);				// final1
-  
-	pmulhw_m2r(fix_184n261, mm0);
-	paddw_r2r(mm5, mm2);				// tmp0+tmp7,final1
-
-	pmulhw_m2r(fix_184, mm3);
-	psubw_r2r(mm5, mm4);				// tmp0-tmp7,final1
-
-//    tmp6 = tmp22 - tmp7;	/* phase 2 */
-	psraw_i2r(3, mm2);				// outptr[0,0],[1,0],[2,0],[3,0],final1
-
-	paddw_r2r(mm6, mm7);				// tmp20
-	psraw_i2r(3, mm4);				// outptr[0,7],[1,7],[2,7],[3,7],final1
-
-	paddw_r2r(mm0, mm3);				// tmp22
-
-//    tmp5 = tmp21 - tmp6;
-	psubw_r2r(mm5, mm3);				// tmp6
-
-//    tmp4 = tmp20 + tmp5;
-	movq_m2r(*(wsptr+1), mm0);		// tmp1,final2
-	psubw_r2r(mm3, mm1);				// tmp5
-
-	movq_r2r(mm0, mm6);				// final2
-	paddw_r2r(mm3, mm0);				// tmp1+tmp6,final2
-
-    /* Final output stage: scale down by a factor of 8 and range-limit */
-
-//    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-//			    & RANGE_MASK];	final1
-
-
-//    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-//			    & RANGE_MASK];	final2
-	psubw_r2r(mm3, mm6);				// tmp1-tmp6,final2
-	psraw_i2r(3, mm0);				// outptr[0,1],[1,1],[2,1],[3,1]
-
-	psraw_i2r(3, mm6);				// outptr[0,6],[1,6],[2,6],[3,6]
-	
-	packuswb_r2r(mm4, mm0);			// out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
-	
-	movq_m2r(*(wsptr+2), mm5);		// tmp2,final3
-	packuswb_r2r(mm6, mm2);			// out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
-
-//    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-//			    & RANGE_MASK];	final3
-	paddw_r2r(mm1, mm7);				// tmp4
-	movq_r2r(mm5, mm3);
-
-	paddw_r2r(mm1, mm5);				// tmp2+tmp5
-	psubw_r2r(mm1, mm3);				// tmp2-tmp5
-
-	psraw_i2r(3, mm5);				// outptr[0,2],[1,2],[2,2],[3,2]
-
-	movq_m2r(*(wsptr+3), mm4);		// tmp3,final4
-	psraw_i2r(3, mm3);				// outptr[0,5],[1,5],[2,5],[3,5]
-
-
-
-//    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-//			    & RANGE_MASK];
-//    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-//			    & RANGE_MASK];	final4
-	movq_r2r(mm4, mm6);
-	paddw_r2r(mm7, mm4);				// tmp3+tmp4
-
-	psubw_r2r(mm7, mm6);				// tmp3-tmp4
-	psraw_i2r(3, mm4);				// outptr[0,4],[1,4],[2,4],[3,4]
-
-	psraw_i2r(3, mm6);				// outptr[0,3],[1,3],[2,3],[3,3]
-
-	/*
-   movq_r2m(mm4, *dummy);
-	fprintf(stderr, "3-4 %016llx\n", dummy);
-   movq_r2m(mm4, *dummy);
-	fprintf(stderr, "3+4 %016llx\n", dummy);
-	*/
-	
-
-	packuswb_r2r(mm4, mm5);			// out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
-
-	packuswb_r2r(mm3, mm6);			// out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
-	movq_r2r(mm2, mm4);
-
-	movq_r2r(mm5, mm7);
-	punpcklbw_r2r(mm0, mm2);		// out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
-
-	punpckhbw_r2r(mm0, mm4);		// out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
-	movq_r2r(mm2, mm1);
-
-	punpcklbw_r2r(mm6, mm5);		// out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
-	
-	punpckhbw_r2r(mm6, mm7);		// out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
-
-	punpcklwd_r2r(mm5, mm2);		// out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
-	
-	movq_r2r(mm7, mm6);
-	punpckhwd_r2r(mm5, mm1);		// out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
-
-	movq_r2r(mm2, mm0);
-	punpcklwd_r2r(mm4, mm6);		// out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
-
-	punpckldq_r2r(mm6, mm2);		// out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
-
-	movq_r2r(mm1, mm3);
-
-	punpckhwd_r2r(mm4, mm7);		// out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
-	
-	dataptr += rskip;
-	movq_r2m(mm2, *(dataptr));
-
-	punpckhdq_r2r(mm6, mm0);		// out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
-
-	dataptr += rskip;
-	movq_r2m(mm0, *(dataptr));
-
-	punpckldq_r2r(mm7, mm1);		// out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
-	
-	punpckhdq_r2r(mm7, mm3);		// out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
-
-	dataptr += rskip;
-	movq_r2m(mm1, *(dataptr));
-
-	dataptr += rskip;
-	movq_r2m(mm3, *(dataptr));
-
-#else
-  __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  __s32 tmp10, tmp11, tmp12, tmp13;
-  __s32 z5, z10, z11, z12, z13;
-  __s16 *inptr;
-  __s32 *wsptr;
-  __u8 *outptr;
-  int ctr;
-  __s32 dcval;
-  __s32 workspace[64];
-
-  inptr = data;
-  wsptr = workspace;
-  for (ctr = 8; ctr > 0; ctr--) {
-    
-    if ((inptr[8] | inptr[16] | inptr[24] |
-	 inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) {
-      dcval = inptr[0];
-      wsptr[0] = dcval;
-      wsptr[8] = dcval;
-      wsptr[16] = dcval;
-      wsptr[24] = dcval;
-      wsptr[32] = dcval;
-      wsptr[40] = dcval;
-      wsptr[48] = dcval;
-      wsptr[56] = dcval;
-      
-      inptr++;	
-      wsptr++;
-      continue;
-    } 
-    
-    tmp0 = inptr[0];
-    tmp1 = inptr[16];
-    tmp2 = inptr[32];
-    tmp3 = inptr[48];
-
-    tmp10 = tmp0 + tmp2;
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;
-    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13;
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-    
-    tmp4 = inptr[8];
-    tmp5 = inptr[24];
-    tmp6 = inptr[40];
-    tmp7 = inptr[56];
-
-    z13 = tmp6 + tmp5;
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
-    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
-    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
-
-    tmp6 = tmp12 - tmp7;
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
-
-    wsptr[0] = (__s32) (tmp0 + tmp7);
-    wsptr[56] = (__s32) (tmp0 - tmp7);
-    wsptr[8] = (__s32) (tmp1 + tmp6);
-    wsptr[48] = (__s32) (tmp1 - tmp6);
-    wsptr[16] = (__s32) (tmp2 + tmp5);
-    wsptr[40] = (__s32) (tmp2 - tmp5);
-    wsptr[32] = (__s32) (tmp3 + tmp4);
-    wsptr[24] = (__s32) (tmp3 - tmp4);
-
-    inptr++;
-    wsptr++;
-  }
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++) {
-    outptr = &(odata[ctr*rskip]);
-
-    tmp10 = wsptr[0] + wsptr[4];
-    tmp11 = wsptr[0] - wsptr[4];
-
-    tmp13 = wsptr[2] + wsptr[6];
-    tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13;
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    z13 = wsptr[5] + wsptr[3];
-    z10 = wsptr[5] - wsptr[3];
-    z11 = wsptr[1] + wsptr[7];
-    z12 = wsptr[1] - wsptr[7];
-
-    tmp7 = z11 + z13;
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
-    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
-    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
-
-    tmp6 = tmp12 - tmp7;
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
-
-    outptr[0] = RL(DESCALE(tmp0 + tmp7));
-    outptr[7] = RL(DESCALE(tmp0 - tmp7));
-    outptr[1] = RL(DESCALE(tmp1 + tmp6));
-    outptr[6] = RL(DESCALE(tmp1 - tmp6));
-    outptr[2] = RL(DESCALE(tmp2 + tmp5));
-    outptr[5] = RL(DESCALE(tmp2 - tmp5));
-    outptr[4] = RL(DESCALE(tmp3 + tmp4));
-    outptr[3] = RL(DESCALE(tmp3 - tmp4));
-
-    wsptr += 8;
-  }
-#endif
-}
-/*
-
-Main Routines
-
-This file contains most of the initialisation and control functions
-
-(C) Justin Schoeman 1998
-
-*/
-
-/*
-
-Private function
-
-Initialise all the cache-aliged data blocks
-
-*/
-
-void RTjpeg_init_data(void)
-{
- unsigned long dptr;
- 
- dptr=(unsigned long)&(RTjpeg_alldata[0]);
- dptr+=32;
- dptr=dptr>>5;
- dptr=dptr<<5; /* cache align data */
- 
- RTjpeg_block=(__s16 *)dptr;
- dptr+=sizeof(__s16)*64;
- RTjpeg_lqt=(__s32 *)dptr;
- dptr+=sizeof(__s32)*64;
- RTjpeg_cqt=(__s32 *)dptr;
- dptr+=sizeof(__s32)*64;
- RTjpeg_liqt=(__u32 *)dptr;
- dptr+=sizeof(__u32)*64;
- RTjpeg_ciqt=(__u32 *)dptr;
-}
-
-/*
-
-External Function
-
-Re-set quality factor
-
-Input: buf -> pointer to 128 ints for quant values store to pass back to
-              init_decompress.
-       Q -> quality factor (192=best, 32=worst)
-*/
-
-void RTjpeg_init_Q(__u8 Q)
-{
- int i;
- __u64 qual;
- 
- qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */
-
- for(i=0; i<64; i++)
- {
-  RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
-  if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
-  RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
-  if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
-  RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
-  RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
-  RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
-  RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
- }
- 
- RTjpeg_lb8=0;
- while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
- RTjpeg_lb8--;
- RTjpeg_cb8=0;
- while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
- RTjpeg_cb8--;
-
- RTjpeg_dct_init();
- RTjpeg_idct_init();
- RTjpeg_quant_init();
-}
-
-/*
-
-External Function
-
-Initialise compression.
-
-Input: buf -> pointer to 128 ints for quant values store to pass back to 
-                init_decompress.
-       width -> width of image
-       height -> height of image
-       Q -> quality factor (192=best, 32=worst)
-       
-*/
-
-void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q)
-{
- int i;
- __u64 qual;
- 
- RTjpeg_init_data();
- 
- RTjpeg_width=width;
- RTjpeg_height=height;
- RTjpeg_Ywidth = RTjpeg_width>>3;
- RTjpeg_Ysize=width * height;
- RTjpeg_Cwidth = RTjpeg_width>>4;
- RTjpeg_Csize= (width>>1) * height;
-
- qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */
-
- for(i=0; i<64; i++)
- {
-  RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
-  if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
-  RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
-  if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
-  RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
-  RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
-  RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
-  RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
- }
- 
- RTjpeg_lb8=0;
- while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
- RTjpeg_lb8--;
- RTjpeg_cb8=0;
- while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
- RTjpeg_cb8--;
- 
- RTjpeg_dct_init();
- RTjpeg_quant_init();
-
- for(i=0; i<64; i++)
-  buf[i]=RTjpeg_liqt[i];
- for(i=0; i<64; i++)
-  buf[64+i]=RTjpeg_ciqt[i];
-}
-
-void RTjpeg_init_decompress(__u32 *buf, int width, int height)
-{
- int i;
-
- RTjpeg_init_data();
- 
- RTjpeg_width=width;
- RTjpeg_height=height;
- RTjpeg_Ywidth = RTjpeg_width>>3;
- RTjpeg_Ysize=width * height;
- RTjpeg_Cwidth = RTjpeg_width>>4;
- RTjpeg_Csize= (width>>1) * height;
-
- for(i=0; i<64; i++)
- {
-  RTjpeg_liqt[i]=buf[i];
-  RTjpeg_ciqt[i]=buf[i+64];
- }
-
- RTjpeg_lb8=0;
- while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
- RTjpeg_lb8--;
- RTjpeg_cb8=0;
- while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
- RTjpeg_cb8--;
-
- RTjpeg_idct_init();
-
-// RTjpeg_color_init();
-}
-
-int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp)
-{
- __s8 * sb;
- register __s8 * bp1 = bp + (RTjpeg_width<<3);
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
- register int i, j, k;
-
-#ifdef MMX
- emms();
-#endif
- sb=sp;
-/* Y */
- for(i=RTjpeg_height>>1; i; i-=8)
- {
-  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
-  {
-   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
-   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
-   RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
-   RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
-   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
-   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
-  }
-  bp+=RTjpeg_width<<4;
-  bp1+=RTjpeg_width<<4;
-  bp2+=RTjpeg_width<<2;
-  bp3+=RTjpeg_width<<2;
-			 
- }
-#ifdef MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp)
-{
- __s8 * sb;
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + RTjpeg_Csize;
- register int i, j, k;
-
-#ifdef MMX
- emms();
-#endif
- sb=sp;
-/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
-  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
-  {
-   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
-   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
-   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
-   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
-  }
-  bp+=RTjpeg_width<<3;
-  bp2+=RTjpeg_width<<2;
-  bp3+=RTjpeg_width<<2;
-			 
- }
-#ifdef MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-int RTjpeg_compress8(__s8 *sp, unsigned char *bp)
-{
- __s8 * sb;
- int i, j;
-
-#ifdef MMX
- emms();
-#endif
- 
- sb=sp;
-/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
-  for(j=0; j<RTjpeg_width; j+=8)
-  {
-   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-  }
-  bp+=RTjpeg_width;
- }
-
-#ifdef MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp)
-{
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize);
- int i, j,k;
-
-#ifdef MMX
- emms();
-#endif
-
-/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
-  for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
-    RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
-   }
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
-    RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
-   }
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
-    RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
-   } 
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
-    RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
-   } 
-  }
-  bp+=RTjpeg_width<<3;
-  bp2+=RTjpeg_width<<2;
-  bp3+=RTjpeg_width<<2;
- }
-#ifdef MMX
- emms();
-#endif
-}
-
-void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp)
-{
- register __s8 * bp1 = bp + (RTjpeg_width<<3);
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
- int i, j,k;
-
-#ifdef MMX
- emms();
-#endif
-
-/* Y */
- for(i=RTjpeg_height>>1; i; i-=8)
- {
-  for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
-    RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
-   }
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
-    RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
-   }
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
-    RTjpeg_idct(bp1+j, RTjpeg_block, RTjpeg_width);
-   }
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
-    RTjpeg_idct(bp1+j+8, RTjpeg_block, RTjpeg_width);
-   }
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
-    RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
-   } 
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
-    RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
-   } 
-  }
-  bp+=RTjpeg_width<<4;
-  bp1+=RTjpeg_width<<4;
-  bp2+=RTjpeg_width<<2;
-  bp3+=RTjpeg_width<<2;
- }
-#ifdef MMX
- emms();
-#endif
-}
-
-void RTjpeg_decompress8(__s8 *sp, __u8 *bp)
-{
- int i, j;
-
-#ifdef MMX
- emms();
-#endif
-
-/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
-  for(j=0; j<RTjpeg_width; j+=8)
-   if(*sp==-1)sp++;
-   else
-   { 
-    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
-    RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
-   }
-  bp+=RTjpeg_width<<3;
- }
-}
-
-/*
-External Function
-
-Initialise additional data structures for motion compensation
-
-*/
-
-void RTjpeg_init_mcompress(void)
-{
- unsigned long tmp;
-
- if(!RTjpeg_old)
- {
-  RTjpeg_old=malloc((4*RTjpeg_width*RTjpeg_height)+32);
-  tmp=(unsigned long)RTjpeg_old;
-  tmp+=32;
-  tmp=tmp>>5;
-  RTjpeg_old=(__s16 *)(tmp<<5);
- }
- if (!RTjpeg_old)
- {
-  fprintf(stderr, "RTjpeg: Could not allocate memory\n");
-  exit(-1);
- }
- bzero(RTjpeg_old, ((4*RTjpeg_width*RTjpeg_height)));
-}
-
-#ifdef MMX
-
-int RTjpeg_bcomp(__s16 *old, mmx_t *mask)
-{
- int i;
- mmx_t *mold=(mmx_t *)old;
- mmx_t *mblock=(mmx_t *)RTjpeg_block;
- volatile mmx_t result;
- static mmx_t neg=(mmx_t)(unsigned long long)0xffffffffffffffffULL;
- 
- movq_m2r(*mask, mm7);
- movq_m2r(neg, mm6);
- pxor_r2r(mm5, mm5);
- 
- for(i=0; i<8; i++)
- {
-  movq_m2r(*(mblock++), mm0);
-  			movq_m2r(*(mblock++), mm2);
-  movq_m2r(*(mold++), mm1);
-  			movq_m2r(*(mold++), mm3);
-  psubsw_r2r(mm1, mm0);
-  			psubsw_r2r(mm3, mm2);
-  movq_r2r(mm0, mm1);
-  			movq_r2r(mm2, mm3);
-  pcmpgtw_r2r(mm7, mm0);
-  			pcmpgtw_r2r(mm7, mm2);
-  pxor_r2r(mm6, mm1);
-  			pxor_r2r(mm6, mm3);
-  pcmpgtw_r2r(mm7, mm1);
-  			pcmpgtw_r2r(mm7, mm3);
-  por_r2r(mm0, mm5);
-  			por_r2r(mm2, mm5);
-  por_r2r(mm1, mm5);
-  			por_r2r(mm3, mm5);
- }
- movq_r2m(mm5, result);
- 
- if(result.q)
- {
-//  if(!RTjpeg_mtest)
-//   for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
-  return 0;
- }
-// printf(".");
- return 1;
-}
-
-#else
-int RTjpeg_bcomp(__s16 *old, __u16 *mask)
-{
- int i;
-
- for(i=0; i<64; i++)
-  if(abs(old[i]-RTjpeg_block[i])>*mask)
-  {
-   if(!RTjpeg_mtest)
-    for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
-   return 0;
-  }
- return 1;
-}
-#endif
-
-void RTjpeg_set_test(int i)
-{
- RTjpeg_mtest=i;
-}
-
-int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
-{
- __s8 * sb;
-//rh __s16 *block;
- register __s8 * bp1 = bp + (RTjpeg_width<<3);
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
- register int i, j, k;
-
-#ifdef MMX
- emms();
- RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
- RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask);
-#else
- RTjpeg_lmask=lmask;
- RTjpeg_cmask=cmask;
-#endif
- 
- sb=sp;
- block=RTjpeg_old;
-/* Y */
- for(i=RTjpeg_height>>1; i; i-=8)
- {
-  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
-  {
-   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-   block+=64;
-
-   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-   block+=64;
-
-   RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-   block+=64;
-
-   RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-   block+=64;
-
-   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-   block+=64;
-
-   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-   block+=64;
-  }
-  bp+=RTjpeg_width<<4;
-  bp1+=RTjpeg_width<<4;
-  bp2+=RTjpeg_width<<2;
-  bp3+=RTjpeg_width<<2;
-			 
- }
-#ifdef MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-
-int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
-{
- __s8 * sb;
- __s16 *block;
- register __s8 * bp2;
- register __s8 * bp3;
- register int i, j, k;
-
-#ifdef MMX
- emms();
- RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
- RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask);
-#else
- RTjpeg_lmask=lmask;
- RTjpeg_cmask=cmask;
-#endif
- 
- bp = bp - RTjpeg_width*0;
- bp2 = bp + RTjpeg_Ysize-RTjpeg_width*0;
- bp3 = bp2 + RTjpeg_Csize;
-
- sb=sp;
- block=RTjpeg_old;
-/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
-  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
-  {
-   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-   block+=64;
-
-   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-   block+=64;
-
-   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-   block+=64;
-
-   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
-   {
-    *((__u8 *)sp++)=255;
-   } 
-	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-   block+=64;
-
-  }
-  bp+=RTjpeg_width<<3;
-  bp2+=RTjpeg_width<<2;
-  bp3+=RTjpeg_width<<2;
- }
- printf ("%d\n", block - RTjpeg_old);
-#ifdef MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask)
-{
- __s8 * sb;
- __s16 *block;
- int i, j;
-
-#ifdef MMX
- emms();
- RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
-#else
- RTjpeg_lmask=lmask;
-#endif
-
- 
- sb=sp;
- block=RTjpeg_old;
-/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
-  for(j=0; j<RTjpeg_width; j+=8)
-  {
-   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
-   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
-   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
-   {
-    *((__u8 *)sp++)=255;
-//    printf("* %d ", sp[-1]);
-   } else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-   block+=64;
-  }
-  bp+=RTjpeg_width<<3;
- }
-#ifdef MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-void RTjpeg_color_init(void)
-{
-}  
-
-#define KcrR 76284
-#define KcrG 53281
-#define KcbG 25625
-#define KcbB 132252
-#define Ky 76284
-
-void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute;
- int yskip;
- 
- yskip=RTjpeg_width;
- 
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
- bufy=&buf[0];
- bufoute=rgb;
- 
- for(i=0; i<(RTjpeg_height); i++)
- {
-  for(j=0; j<RTjpeg_width; j+=2)
-  {
-   crR=(*bufcr-128)*KcrR;
-   crG=(*(bufcr++)-128)*KcrG;
-   cbG=(*bufcb-128)*KcbG;
-   cbB=(*(bufcb++)-128)*KcbB;
-  
-   y=(bufy[j]-16)*Ky;
-   
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-   y=(bufy[j+1]-16)*Ky;
-
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-  }
-  bufy+=yskip;
- }
-}
-
-
-void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
- 
- if(stride==0)
- 	oskip=RTjpeg_width*3;
- else
- 	oskip=2*stride-RTjpeg_width*3;
- 
- yskip=RTjpeg_width;
- 
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*3;
- 
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
-  for(j=0; j<RTjpeg_width; j+=2)
-  {
-   crR=(*bufcr-128)*KcrR;
-   crG=(*(bufcr++)-128)*KcrG;
-   cbG=(*bufcb-128)*KcbG;
-   cbB=(*(bufcb++)-128)*KcbB;
-  
-   y=(bufy[j]-16)*Ky;
-   
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-   y=(bufy[j+1]-16)*Ky;
-
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-   y=(bufy[j+yskip]-16)*Ky;
-
-   tmp=(y+crR)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+cbB)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-   y=(bufy[j+1+yskip]-16)*Ky;
-
-   tmp=(y+crR)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+cbB)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   
-  }
-  bufoute+=oskip;
-  bufouto+=oskip;
-  bufy+=yskip<<1;
- }
-}
-
-
-void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
- 
- if(stride==0)
- 	oskip=RTjpeg_width*4;
- else
- 	oskip = 2*stride-RTjpeg_width*4;
- yskip=RTjpeg_width;
- 
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*4;
- 
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
-  for(j=0; j<RTjpeg_width; j+=2)
-  {
-   crR=(*bufcr-128)*KcrR;
-   crG=(*(bufcr++)-128)*KcrG;
-   cbG=(*bufcb-128)*KcbG;
-   cbB=(*(bufcb++)-128)*KcbB;
-  
-   y=(bufy[j]-16)*Ky;
-   
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   bufoute++;
-
-   y=(bufy[j+1]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   bufoute++;
-
-   y=(bufy[j+yskip]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   bufouto++;
-
-   y=(bufy[j+1+yskip]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   bufouto++;
-   
-  }
-  bufoute+=oskip;
-  bufouto+=oskip;
-  bufy+=yskip<<1;
- }
-}
-
-void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
- 
- if(stride==0)
- 	oskip=RTjpeg_width*3;
- else
- 	oskip=2*stride - RTjpeg_width*3;
- 	
- yskip=RTjpeg_width;
- 
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*3;
- 
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
-  for(j=0; j<RTjpeg_width; j+=2)
-  {
-   crR=(*bufcr-128)*KcrR;
-   crG=(*(bufcr++)-128)*KcrG;
-   cbG=(*bufcb-128)*KcbG;
-   cbB=(*(bufcb++)-128)*KcbB;
-  
-   y=(bufy[j]-16)*Ky;
-   
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-   y=(bufy[j+1]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-   y=(bufy[j+yskip]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
-   y=(bufy[j+1+yskip]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-   
-  }
-  bufoute+=oskip;
-  bufouto+=oskip;
-  bufy+=yskip<<1;
- }
-}
-
-void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
- unsigned char r, g, b;
- 
- if(stride==0)
- 	oskip=RTjpeg_width*2;
- else
- 	oskip=2*stride-RTjpeg_width*2;
- 	
- yskip=RTjpeg_width;
- 
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*2;
- 
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
-  for(j=0; j<RTjpeg_width; j+=2)
-  {
-   crR=(*bufcr-128)*KcrR;
-   crG=(*(bufcr++)-128)*KcrG;
-   cbG=(*bufcb-128)*KcbG;
-   cbB=(*(bufcb++)-128)*KcbB;
-  
-   y=(bufy[j]-16)*Ky;
-   
-   tmp=(y+cbB)>>16;
-   b=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   g=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   r=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(int)((int)b >> 3);
-   tmp|=(int)(((int)g >> 2) << 5);
-   tmp|=(int)(((int)r >> 3) << 11);
-   *(bufoute++)=tmp&0xff;
-   *(bufoute++)=tmp>>8;
-   
-
-   y=(bufy[j+1]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   b=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   g=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   r=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(int)((int)b >> 3);
-   tmp|=(int)(((int)g >> 2) << 5);
-   tmp|=(int)(((int)r >> 3) << 11);
-   *(bufoute++)=tmp&0xff;
-   *(bufoute++)=tmp>>8;
-
-   y=(bufy[j+yskip]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   b=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   g=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   r=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(int)((int)b >> 3);
-   tmp|=(int)(((int)g >> 2) << 5);
-   tmp|=(int)(((int)r >> 3) << 11);
-   *(bufouto++)=tmp&0xff;
-   *(bufouto++)=tmp>>8;
-
-   y=(bufy[j+1+yskip]-16)*Ky;
-
-   tmp=(y+cbB)>>16;
-   b=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y-crG-cbG)>>16;
-   g=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(y+crR)>>16;
-   r=(tmp>255)?255:((tmp<0)?0:tmp);
-   tmp=(int)((int)b >> 3);
-   tmp|=(int)(((int)g >> 2) << 5);
-   tmp|=(int)(((int)r >> 3) << 11);
-   *(bufouto++)=tmp&0xff;
-   *(bufouto++)=tmp>>8;
-
-  }
-  bufoute+=oskip;
-  bufouto+=oskip;
-  bufy+=yskip<<1;
- }
-}
-
-/* fix stride */
-
-void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride)
-{
- bcopy(buf, rgb, RTjpeg_width*RTjpeg_height);
-}
-
--- a/RTjpegN.h	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/* 
-   RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za)
-   
-   With modifications by:
-   (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de>
-   and
-   (c) 1999 by Wim Taymans <wim.taymans@tvd.be>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-    
-*/
-
-#ifndef _I386_TYPES_H
-typedef unsigned char __u8;
-typedef unsigned short __u16;
-typedef unsigned long __u32;
-typedef unsigned long long __u64;
-typedef signed char __s8;
-typedef signed short __s16;
-typedef signed long __s32;
-#endif
-
-extern void RTjpeg_init_Q(__u8 Q);
-extern void RTjpeg_init_compress(long unsigned int *buf, int width, int height, __u8 Q);
-extern void RTjpeg_init_decompress(long unsigned int *buf, int width, int height);
-extern int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp);
-extern int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp);
-extern void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp);
-extern void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp);
-extern int RTjpeg_compress8(__s8 *sp, unsigned char *bp);
-extern void RTjpeg_decompress8(__s8 *sp, __u8 *bp);
-
-extern void RTjpeg_init_mcompress(void);
-extern int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask);
-extern int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask);
-extern int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask);
-extern void RTjpeg_set_test(int i);
-
-extern void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride);
-		  
-
--- a/alaw.h	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-// Generated by TOOLS/alaw-gen.c
-
-short alaw2short[]={
- -5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736,
- -7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784,
- -2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368,
- -3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392,
--22016,-20992,-24064,-23040,-17920,-16896,-19968,-18944,
--30208,-29184,-32256,-31232,-26112,-25088,-28160,-27136,
--11008,-10496,-12032,-11520, -8960, -8448, -9984, -9472,
--15104,-14592,-16128,-15616,-13056,-12544,-14080,-13568,
-  -344,  -328,  -376,  -360,  -280,  -264,  -312,  -296,
-  -472,  -456,  -504,  -488,  -408,  -392,  -440,  -424,
-   -88,   -72,  -120,  -104,   -24,    -8,   -56,   -40,
-  -216,  -200,  -248,  -232,  -152,  -136,  -184,  -168,
- -1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184,
- -1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696,
-  -688,  -656,  -752,  -720,  -560,  -528,  -624,  -592,
-  -944,  -912, -1008,  -976,  -816,  -784,  -880,  -848,
-  5504,  5248,  6016,  5760,  4480,  4224,  4992,  4736,
-  7552,  7296,  8064,  7808,  6528,  6272,  7040,  6784,
-  2752,  2624,  3008,  2880,  2240,  2112,  2496,  2368,
-  3776,  3648,  4032,  3904,  3264,  3136,  3520,  3392,
- 22016, 20992, 24064, 23040, 17920, 16896, 19968, 18944,
- 30208, 29184, 32256, 31232, 26112, 25088, 28160, 27136,
- 11008, 10496, 12032, 11520,  8960,  8448,  9984,  9472,
- 15104, 14592, 16128, 15616, 13056, 12544, 14080, 13568,
-   344,   328,   376,   360,   280,   264,   312,   296,
-   472,   456,   504,   488,   408,   392,   440,   424,
-    88,    72,   120,   104,    24,     8,    56,    40,
-   216,   200,   248,   232,   152,   136,   184,   168,
-  1376,  1312,  1504,  1440,  1120,  1056,  1248,  1184,
-  1888,  1824,  2016,  1952,  1632,  1568,  1760,  1696,
-   688,   656,   752,   720,   560,   528,   624,   592,
-   944,   912,  1008,   976,   816,   784,   880,   848
-};
-
-short ulaw2short[]={
--32124,-31100,-30076,-29052,-28028,-27004,-25980,-24956,
--23932,-22908,-21884,-20860,-19836,-18812,-17788,-16764,
--15996,-15484,-14972,-14460,-13948,-13436,-12924,-12412,
--11900,-11388,-10876,-10364, -9852, -9340, -8828, -8316,
- -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140,
- -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092,
- -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004,
- -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980,
- -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436,
- -1372, -1308, -1244, -1180, -1116, -1052,  -988,  -924,
-  -876,  -844,  -812,  -780,  -748,  -716,  -684,  -652,
-  -620,  -588,  -556,  -524,  -492,  -460,  -428,  -396,
-  -372,  -356,  -340,  -324,  -308,  -292,  -276,  -260,
-  -244,  -228,  -212,  -196,  -180,  -164,  -148,  -132,
-  -120,  -112,  -104,   -96,   -88,   -80,   -72,   -64,
-   -56,   -48,   -40,   -32,   -24,   -16,    -8,     0,
- 32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956,
- 23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764,
- 15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412,
- 11900, 11388, 10876, 10364,  9852,  9340,  8828,  8316,
-  7932,  7676,  7420,  7164,  6908,  6652,  6396,  6140,
-  5884,  5628,  5372,  5116,  4860,  4604,  4348,  4092,
-  3900,  3772,  3644,  3516,  3388,  3260,  3132,  3004,
-  2876,  2748,  2620,  2492,  2364,  2236,  2108,  1980,
-  1884,  1820,  1756,  1692,  1628,  1564,  1500,  1436,
-  1372,  1308,  1244,  1180,  1116,  1052,   988,   924,
-   876,   844,   812,   780,   748,   716,   684,   652,
-   620,   588,   556,   524,   492,   460,   428,   396,
-   372,   356,   340,   324,   308,   292,   276,   260,
-   244,   228,   212,   196,   180,   164,   148,   132,
-   120,   112,   104,    96,    88,    80,    72,    64,
-    56,    48,    40,    32,    24,    16,     8,     0
-};
--- a/cinepak.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,886 +0,0 @@
-/* ------------------------------------------------------------------------
- * Radius Cinepak Video Decoder
- *
- * Dr. Tim Ferguson, 2001.
- * For more details on the algorithm:
- *         http://www.csse.monash.edu.au/~timf/videocodec.html
- *
- * This is basically a vector quantiser with adaptive vector density.  The
- * frame is segmented into 4x4 pixel blocks, and each block is coded using
- * either 1 or 4 vectors.
- *
- * There are still some issues with this code yet to be resolved.  In
- * particular with decoding in the strip boundaries.
- * ------------------------------------------------------------------------ */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <math.h>
-
-#include "config.h"
-#include "mp_msg.h"
-#include "bswap.h"
-
-#include "libvo/img_format.h"
-#include "mp_image.h"
-
-#define DBUG	0
-#define MAX_STRIPS 32
-
-/* ------------------------------------------------------------------------ */
-typedef struct
-{
-	unsigned char y0, y1, y2, y3;
-	char u, v;
-
-	// These variables are for YV12 output: The v1 vars are for
-	// when the vector is doublesized and used by itself to paint a
-	// 4x4 block.
-	// This quad (y0 y0 y1 y1) is used on the 2 upper rows.
-	unsigned long yv12_v1_u;
-	// This quad (y2 y2 y3 y3) is used on the 2 lower rows.
-	unsigned long yv12_v1_l;
-	// The v4 vars are for when the vector is used as 1 of 4 vectors
-	// to paint a 4x4 block.
-	// Upper pair (y0 y1):
-	unsigned short yv12_v4_u;
-	// Lower pair (y2 y3):
-	unsigned short yv12_v4_l;
-
-	// These longs are for YUY2 output: The v1 vars are for when the
-	// vector is doublesized and used by itself to paint a 4x4 block.
-	// The names stand for the upper-left, upper-right,
-	// lower-left, and lower-right YUY2 pixel pairs.
-        unsigned long yuy2_v1_ul, yuy2_v1_ur;
-        unsigned long yuy2_v1_ll, yuy2_v1_lr;
-	// The v4 vars are for when the vector is used as 1 of 4 vectors
-	// to paint a 4x4 block. The names stand for upper and lower
-	// YUY2 pixel pairs.
-        unsigned long yuy2_v4_u, yuy2_v4_l;
-
-	// These longs are for BGR32 output
-	unsigned long rgb0, rgb1, rgb2, rgb3;
-
-	// These char arrays are for BGR24 output
-	unsigned char r[4], g[4], b[4];
-} cvid_codebook;
-
-typedef struct {
-	cvid_codebook *v4_codebook[MAX_STRIPS];
-	cvid_codebook *v1_codebook[MAX_STRIPS];
-	unsigned long strip_num;
-} cinepak_info;
-
-
-/* ------------------------------------------------------------------------ */
-static unsigned char *in_buffer, uiclip[1024], *uiclp = NULL;
-
-#define SCALEBITS 16
-#define ONE_HALF  ((long) 1 << (SCALEBITS-1))
-#define FIX(x)    ((long) ((x) * (1L<<SCALEBITS) + 0.5))
-static long CU_Y_tab[256], CV_Y_tab[256], CU_Cb_tab[256], CV_Cb_tab[256],
-	CU_Cr_tab[256], CV_Cr_tab[256];
-
-#define get_byte() *(in_buffer++)
-#define skip_byte() in_buffer++
-#define get_word() ((unsigned short)(in_buffer += 2, \
-	(in_buffer[-2] << 8 | in_buffer[-1])))
-#define get_long() ((unsigned long)(in_buffer += 4, \
-	(in_buffer[-4] << 24 | in_buffer[-3] << 16 | in_buffer[-2] << 8 | in_buffer[-1])))
-
-
-/* ---------------------------------------------------------------------- */
-
-// This PACKing macro packs the luminance bytes as y1-y1-y0-y0, which is
-// stored on a little endian machine as y0-y0-y1-y1. Therefore, treat it as
-// a little endian number and rearrange the bytes on big endian machines
-// using the built-in byte order macros.
-#define PACK_YV12_V1_Y(cb,y0,y1) le2me_32((((unsigned char)cb->y1)<<24)|(cb->y1<<16)|(((unsigned char)cb->y0)<<8)|(cb->y0))
-#define PACK_YV12_V4_Y(cb,y0,y1) le2me_16((((unsigned char)cb->y1)<<8)|(cb->y0))
-
-static inline void read_codebook_yv12(cvid_codebook *c, int mode)
-{
-unsigned char y0, y1, y2, y3, u, v;
-int y_uv;
-
-	if(mode)		/* black and white */
-		{
-		c->y0 = get_byte();
-		c->y1 = get_byte();
-		c->y2 = get_byte();
-		c->y3 = get_byte();
-		c->u = c->v = 128;
-		}
-	else			/* colour */
-		{
-		y0 = get_byte();  /* luma */
-		y1 = get_byte();
-		y2 = get_byte();
-		y3 = get_byte();
-		u = 128+get_byte(); /* chroma */
-		v = 128+get_byte();
-
-		/*             YUV * inv(CinYUV)
-		 *  | Y  |   | 1 -0.0655  0.0110 | | CY |
-		 *  | Cb | = | 0  1.1656 -0.0062 | | CU |
-		 *  | Cr |   | 0  0.0467  1.4187 | | CV |
-		 */
-		y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS);
-		c->y0 = uiclp[y0 + y_uv];
-		c->y1 = uiclp[y1 + y_uv];
-		c->y2 = uiclp[y2 + y_uv];
-		c->y3 = uiclp[y3 + y_uv];
-		c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)];
-		c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)];
-
-		c->yv12_v1_u = PACK_YV12_V1_Y(c, y0, y1);
-		c->yv12_v1_l = PACK_YV12_V1_Y(c, y2, y3);
-		c->yv12_v4_u = PACK_YV12_V4_Y(c, y0, y1);
-		c->yv12_v4_l = PACK_YV12_V4_Y(c, y2, y3);
-		}
-}
-
-/* ---------------------------------------------------------------------- */
-
-inline void cvid_v1_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
-{
-unsigned char *p;
-int stride;
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	// take care of the luminance
-	stride = mpi->stride[0];
-	p = mpi->planes[0]+y*stride+x;
-	*((unsigned int*)p)=cb->yv12_v1_u;
-	*((unsigned int*)(p+stride))=cb->yv12_v1_u;
-	*((unsigned int*)(p+stride*2))=cb->yv12_v1_l;
-	*((unsigned int*)(p+stride*3))=cb->yv12_v1_l;
-
-	// now for the chrominance
-	x/=2; y/=2;
-
-	stride = mpi->stride[1];
-	p = mpi->planes[1]+y*stride+x;
-	p[0]=p[1]=p[stride]=p[stride+1]=cb->u;
-
-	stride = mpi->stride[2];
-	p = mpi->planes[2]+y*stride+x;
-	p[0]=p[1]=p[stride]=p[stride+1]=cb->v;
-
-}
-
-/* ---------------------------------------------------------------------- */
-inline void cvid_v4_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
-	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
-{
-unsigned char *p;
-int stride;
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	// take care of the luminance
-	stride = mpi->stride[0];
-	p = mpi->planes[0]+y*stride+x;
-	((unsigned short*)p)[0]=cb0->yv12_v4_u;
-	((unsigned short*)p)[1]=cb1->yv12_v4_u;
-	((unsigned short*)(p+stride))[0]=cb0->yv12_v4_l;
-	((unsigned short*)(p+stride))[1]=cb1->yv12_v4_l;
-	((unsigned short*)(p+stride*2))[0]=cb2->yv12_v4_u;
-	((unsigned short*)(p+stride*2))[1]=cb3->yv12_v4_u;
-	((unsigned short*)(p+stride*3))[0]=cb2->yv12_v4_l;
-	((unsigned short*)(p+stride*3))[1]=cb3->yv12_v4_l;
-
-	// now for the chrominance
-	x/=2; y/=2;
-
-	stride = mpi->stride[1];
-	p = mpi->planes[1]+y*stride+x;
-	p[0]=cb0->u; p[1]=cb1->u;
-	p[stride]=cb2->u; p[stride+1]=cb3->u;
-
-	stride = mpi->stride[2];
-	p = mpi->planes[2]+y*stride+x;
-	p[0]=cb0->v; p[1]=cb1->v;
-	p[stride]=cb2->v; p[stride+1]=cb3->v;
-
-}
-
-/* ---------------------------------------------------------------------- */
-
-#define PACK_YUY2(cb,y0,y1,u,v) le2me_32(((((unsigned char)cb->v)<<24)|(cb->y1<<16)|(((unsigned char)cb->u)<<8)|(cb->y0)))
-
-static inline void read_codebook_yuy2(cvid_codebook *c, int mode)
-{
-unsigned char y0, y1, y2, y3, u, v;
-int y_uv;
-
-	if(mode)		/* black and white */
-		{
-		c->y0 = get_byte();
-		c->y1 = get_byte();
-		c->y2 = get_byte();
-		c->y3 = get_byte();
-		c->u = c->v = 128;
-		}
-	else			/* colour */
-		{
-		y0 = get_byte();  /* luma */
-		y1 = get_byte();
-		y2 = get_byte();
-		y3 = get_byte();
-		u = 128+get_byte(); /* chroma */
-		v = 128+get_byte();
-
-		/*             YUV * inv(CinYUV)
-		 *  | Y  |   | 1 -0.0655  0.0110 | | CY |
-		 *  | Cb | = | 0  1.1656 -0.0062 | | CU |
-		 *  | Cr |   | 0  0.0467  1.4187 | | CV |
-		 */
-		y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS);
-		c->y0 = uiclp[y0 + y_uv];
-		c->y1 = uiclp[y1 + y_uv];
-		c->y2 = uiclp[y2 + y_uv];
-		c->y3 = uiclp[y3 + y_uv];
-		c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)];
-		c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)];
-
-		c->yuy2_v4_u = PACK_YUY2(c, y0, y1, u, v);
-		c->yuy2_v4_l = PACK_YUY2(c, y2, y3, u, v);
-		c->yuy2_v1_ul = PACK_YUY2(c, y0, y0, u, v);
-		c->yuy2_v1_ur = PACK_YUY2(c, y1, y1, u, v);
-		c->yuy2_v1_ll = PACK_YUY2(c, y2, y2, u, v);
-		c->yuy2_v1_lr = PACK_YUY2(c, y3, y3, u, v);
-		}
-}
-
-/* ------------------------------------------------------------------------ */
-inline void cvid_v1_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
-{
-int stride = mpi->stride[0] / 2;
-unsigned long *vptr = (unsigned long *)mpi->planes[0];
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	vptr += (y * mpi->stride[0] + x) / 2;
-
-	vptr[0] = cb->yuy2_v1_ul;
-	vptr[1] = cb->yuy2_v1_ur;
-
-	vptr += stride;
-	vptr[0] = cb->yuy2_v1_ul;
-	vptr[1] = cb->yuy2_v1_ur;
-
-	vptr += stride;
-	vptr[0] = cb->yuy2_v1_ll;
-	vptr[1] = cb->yuy2_v1_lr;
-
-	vptr += stride;
-	vptr[0] = cb->yuy2_v1_ll;
-	vptr[1] = cb->yuy2_v1_lr;
-}
-
-
-/* ------------------------------------------------------------------------ */
-inline void cvid_v4_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
-	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
-{
-int stride = mpi->stride[0] / 2;
-unsigned long *vptr = (unsigned long *)mpi->planes[0];
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	vptr += (y * mpi->stride[0] + x) / 2;
-
-	vptr[0] = cb0->yuy2_v4_u;
-	vptr[1] = cb1->yuy2_v4_u;
-
-	vptr += stride;
-	vptr[0] = cb0->yuy2_v4_l;
-	vptr[1] = cb1->yuy2_v4_l;
-
-	vptr += stride;
-	vptr[0] = cb2->yuy2_v4_u;
-	vptr[1] = cb3->yuy2_v4_u;
-
-	vptr += stride;
-	vptr[0] = cb2->yuy2_v4_l;
-	vptr[1] = cb3->yuy2_v4_l;
-}
-
-/* ---------------------------------------------------------------------- */
-static inline void read_codebook_32(cvid_codebook *c, int mode)
-{
-int uvr, uvg, uvb;
-
-	if(mode)		/* black and white */
-		{
-		c->y0 = get_byte();
-		c->y1 = get_byte();
-		c->y2 = get_byte();
-		c->y3 = get_byte();
-		c->u = c->v = 0;
-
-		c->rgb0 = (c->y0 << 16) | (c->y0 << 8) | c->y0;
-		c->rgb1 = (c->y1 << 16) | (c->y1 << 8) | c->y1;
-		c->rgb2 = (c->y2 << 16) | (c->y2 << 8) | c->y2;
-		c->rgb3 = (c->y3 << 16) | (c->y3 << 8) | c->y3;
-		}
-	else			/* colour */
-		{
-		c->y0 = get_byte();  /* luma */
-		c->y1 = get_byte();
-		c->y2 = get_byte();
-		c->y3 = get_byte();
-		c->u = get_byte(); /* chroma */
-		c->v = get_byte();
-
-		uvr = c->v << 1;
-		uvg = -((c->u+1) >> 1) - c->v;
-		uvb = c->u << 1;
-
-		c->rgb0 = le2me_32((uiclp[c->y0 + uvr] << 16) | (uiclp[c->y0 + uvg] << 8) | uiclp[c->y0 + uvb]);
-		c->rgb1 = le2me_32((uiclp[c->y1 + uvr] << 16) | (uiclp[c->y1 + uvg] << 8) | uiclp[c->y1 + uvb]);
-		c->rgb2 = le2me_32((uiclp[c->y2 + uvr] << 16) | (uiclp[c->y2 + uvg] << 8) | uiclp[c->y2 + uvb]);
-		c->rgb3 = le2me_32((uiclp[c->y3 + uvr] << 16) | (uiclp[c->y3 + uvg] << 8) | uiclp[c->y3 + uvb]);
-		}
-}
-
-
-/* ------------------------------------------------------------------------ */
-inline void cvid_v1_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
-{
-int stride = mpi->stride[0];
-unsigned long *vptr = (unsigned long *)mpi->planes[0];
-unsigned long rgb;
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	vptr += (y * stride + x);
-
-	vptr[0] = rgb = cb->rgb0; vptr[1] = rgb;
-	vptr[2] = rgb = cb->rgb1; vptr[3] = rgb;
-	vptr += stride;
-	vptr[0] = rgb = cb->rgb0; vptr[1] = rgb;
-	vptr[2] = rgb = cb->rgb1; vptr[3] = rgb;
-	vptr += stride;
-	vptr[0] = rgb = cb->rgb2; vptr[1] = rgb;
-	vptr[2] = rgb = cb->rgb3; vptr[3] = rgb;
-	vptr += stride;
-	vptr[0] = rgb = cb->rgb2; vptr[1] = rgb;
-	vptr[2] = rgb = cb->rgb3; vptr[3] = rgb;
-}
-
-
-/* ------------------------------------------------------------------------ */
-inline void cvid_v4_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
-	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
-{
-int stride = mpi->stride[0];
-unsigned long *vptr = (unsigned long *)mpi->planes[0];
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	vptr += (y * stride + x);
-
-	vptr[0] = cb0->rgb0;
-	vptr[1] = cb0->rgb1;
-	vptr[2] = cb1->rgb0;
-	vptr[3] = cb1->rgb1;
-	vptr += stride;
-	vptr[0] = cb0->rgb2;
-	vptr[1] = cb0->rgb3;
-	vptr[2] = cb1->rgb2;
-	vptr[3] = cb1->rgb3;
-	vptr += stride;
-	vptr[0] = cb2->rgb0;
-	vptr[1] = cb2->rgb1;
-	vptr[2] = cb3->rgb0;
-	vptr[3] = cb3->rgb1;
-	vptr += stride;
-	vptr[0] = cb2->rgb2;
-	vptr[1] = cb2->rgb3;
-	vptr[2] = cb3->rgb2;
-	vptr[3] = cb3->rgb3;
-}
-
-
-/* ---------------------------------------------------------------------- */
-static inline void read_codebook_24(cvid_codebook *c, int mode)
-{
-int uvr, uvg, uvb;
-
-	if(mode)		/* black and white */
-		{
-		c->y0 = get_byte();
-		c->y1 = get_byte();
-		c->y2 = get_byte();
-		c->y3 = get_byte();
-		c->u = c->v = 0;
-
-		c->r[0] = c->g[0] = c->b[0] = c->y0;
-		c->r[1] = c->g[1] = c->b[1] = c->y1;
-		c->r[2] = c->g[2] = c->b[2] = c->y2;
-		c->r[3] = c->g[3] = c->b[3] = c->y3;
-		}
-	else			/* colour */
-		{
-		c->y0 = get_byte();  /* luma */
-		c->y1 = get_byte();
-		c->y2 = get_byte();
-		c->y3 = get_byte();
-		c->u = get_byte(); /* chroma */
-		c->v = get_byte();
-
-		uvr = c->v << 1;
-		uvg = -((c->u+1) >> 1) - c->v;
-		uvb = c->u << 1;
-
-		c->r[0] = uiclp[c->y0 + uvr]; c->g[0] = uiclp[c->y0 + uvg]; c->b[0] = uiclp[c->y0 + uvb];
-		c->r[1] = uiclp[c->y1 + uvr]; c->g[1] = uiclp[c->y1 + uvg]; c->b[1] = uiclp[c->y1 + uvb];
-		c->r[2] = uiclp[c->y2 + uvr]; c->g[2] = uiclp[c->y2 + uvg]; c->b[2] = uiclp[c->y2 + uvb];
-		c->r[3] = uiclp[c->y3 + uvr]; c->g[3] = uiclp[c->y3 + uvg]; c->b[3] = uiclp[c->y3 + uvb];
-		}
-}
-
-
-/* ------------------------------------------------------------------------ */
-inline void cvid_v1_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
-{
-unsigned char r, g, b;
-int stride = (mpi->stride[0]-4)*3;
-unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3;
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	*vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-	*vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-	vptr += stride;
-	*vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-	*vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-	vptr += stride;
-	*vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-	*vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-	vptr += stride;
-	*vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-	*vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3];
-	*vptr++ = b; *vptr++ = g; *vptr++ = r;
-}
-
-
-/* ------------------------------------------------------------------------ */
-inline void cvid_v4_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
-	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
-{
-int stride = (mpi->stride[0]-4)*3;
-unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3;
-
-	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
-
-	*vptr++ = cb0->b[0]; *vptr++ = cb0->g[0]; *vptr++ = cb0->r[0];
-	*vptr++ = cb0->b[1]; *vptr++ = cb0->g[1]; *vptr++ = cb0->r[1];
-	*vptr++ = cb1->b[0]; *vptr++ = cb1->g[0]; *vptr++ = cb1->r[0];
-	*vptr++ = cb1->b[1]; *vptr++ = cb1->g[1]; *vptr++ = cb1->r[1];
-	vptr += stride;
-	*vptr++ = cb0->b[2]; *vptr++ = cb0->g[2]; *vptr++ = cb0->r[2];
-	*vptr++ = cb0->b[3]; *vptr++ = cb0->g[3]; *vptr++ = cb0->r[3];
-	*vptr++ = cb1->b[2]; *vptr++ = cb1->g[2]; *vptr++ = cb1->r[2];
-	*vptr++ = cb1->b[3]; *vptr++ = cb1->g[3]; *vptr++ = cb1->r[3];
-	vptr += stride;
-	*vptr++ = cb2->b[0]; *vptr++ = cb2->g[0]; *vptr++ = cb2->r[0];
-	*vptr++ = cb2->b[1]; *vptr++ = cb2->g[1]; *vptr++ = cb2->r[1];
-	*vptr++ = cb3->b[0]; *vptr++ = cb3->g[0]; *vptr++ = cb3->r[0];
-	*vptr++ = cb3->b[1]; *vptr++ = cb3->g[1]; *vptr++ = cb3->r[1];
-	vptr += stride;
-	*vptr++ = cb2->b[2]; *vptr++ = cb2->g[2]; *vptr++ = cb2->r[2];
-	*vptr++ = cb2->b[3]; *vptr++ = cb2->g[3]; *vptr++ = cb2->r[3];
-	*vptr++ = cb3->b[2]; *vptr++ = cb3->g[2]; *vptr++ = cb3->r[2];
-	*vptr++ = cb3->b[3]; *vptr++ = cb3->g[3]; *vptr++ = cb3->r[3];
-}
-
-/* ------------------------------------------------------------------------
- * Call this function once at the start of the sequence and save the
- * returned context for calls to decode_cinepak().
- */
-void *decode_cinepak_init(void)
-{
-cinepak_info *cvinfo;
-int i, x;
-
-	if((cvinfo = calloc(sizeof(cinepak_info), 1)) == NULL) return NULL;
-	cvinfo->strip_num = 0;
-
-	if(uiclp == NULL)
-		{
-		uiclp = uiclip+512;
-		for(i = -512; i < 512; i++)
-			uiclp[i] = (i < 0 ? 0 : (i > 255 ? 255 : i));
-		}
-
-	for(i = 0, x = -128; i < 256; i++, x++)
-		{
-		CU_Y_tab[i] = (-FIX(0.0655)) * x;
-		CV_Y_tab[i] = (FIX(0.0110)) * x + ONE_HALF;
-		CU_Cb_tab[i] = (FIX(1.1656)) * x;
-		CV_Cb_tab[i] = (-FIX(0.0062)) * x + ONE_HALF + FIX(128);
-		CU_Cr_tab[i] = (FIX(0.0467)) * x;
-		CV_Cr_tab[i] = (FIX(1.4187)) * x + ONE_HALF + FIX(128);
-		}
-
-	return (void *)cvinfo;
-}
-
-
-/* ------------------------------------------------------------------------
- * This function decodes a buffer containing a Cinepak encoded frame.
- *
- * context - the context created by decode_cinepak_init().
- * buf - the input buffer to be decoded
- * size - the size of the input buffer
- * frame - the output frame buffer
- * width - the width of the output frame
- * height - the height of the output frame
- * bit_per_pixel - the number of bits per pixel allocated to the output
- *   frame; depths support:
- *     32: BGR32
- *     24: BGR24
- *     16: YUY2
- *     12: YV12
- */
-void decode_cinepak(void *context, unsigned char *buf, int size, mp_image_t *mpi)
-{
-cinepak_info *cvinfo = (cinepak_info *)context;
-cvid_codebook *v4_codebook, *v1_codebook, *codebook = NULL;
-unsigned long x, y, y_bottom, frame_flags, strips, cv_width, cv_height, cnum,
-	strip_id, chunk_id, x0, y0, x1, y1, ci, flag, mask;
-long len, top_size, chunk_size;
-unsigned int i, cur_strip, d0, d1, d2, d3;
-int modulo;
-void (*read_codebook)(cvid_codebook *c, int mode) = NULL;
-void (*cvid_v1)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) = NULL;
-void (*cvid_v4)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
-	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) = NULL;
-
-	x = y = 0;
-	y_bottom = 0;
-	in_buffer = buf;
-
-	frame_flags = get_byte();
-	len = get_byte() << 16;
-	len |= get_byte() << 8;
-	len |= get_byte();
-
-	switch(mpi->imgfmt)
-		{
-		case IMGFMT_I420:
-		case IMGFMT_IYUV:
-		case IMGFMT_YV12:  // YV12
-			read_codebook = read_codebook_yv12;
-			cvid_v1 = cvid_v1_yv12;
-			cvid_v4 = cvid_v4_yv12;
-			break;
-		case IMGFMT_YUY2:  // YUY2
-			read_codebook = read_codebook_yuy2;
-			cvid_v1 = cvid_v1_yuy2;
-			cvid_v4 = cvid_v4_yuy2;
-			break;
-		case IMGFMT_BGR24:  // BGR24
-			read_codebook = read_codebook_24;
-			cvid_v1 = cvid_v1_24;
-			cvid_v4 = cvid_v4_24;
-			break;
-		case IMGFMT_BGR32:  // BGR32
-			read_codebook = read_codebook_32;
-			cvid_v1 = cvid_v1_32;
-			cvid_v4 = cvid_v4_32;
-			break;
-		}
-
-	if(len != size)
-		{
-		if(len & 0x01) len++; /* AVIs tend to have a size mismatch */
-		if(len != size)
-			{
-			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: corruption %d (QT/AVI) != %ld (CV)\n", size, len);
-			// return;
-			}
-		}
-
-	cv_width = get_word();
-	cv_height = get_word();
-	strips = get_word();
-
-	if(strips > cvinfo->strip_num)
-		{
-		if(strips >= MAX_STRIPS) 
-			{
-			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: strip overflow (more than %d)\n", MAX_STRIPS);
-			return;
-			}
-
-		for(i = cvinfo->strip_num; i < strips; i++)
-			{
-			if((cvinfo->v4_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL)
-				{
-				mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v4 alloc err\n");
-				return;
-				}
-
-			if((cvinfo->v1_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL)
-				{
-				mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v1 alloc err\n");
-				return;
-				}
-			}
-		}
-	cvinfo->strip_num = strips;
-
-#if DBUG
-	mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: <%ld,%ld> strips %ld\n", cv_width, cv_height, strips);
-#endif
-
-	for(cur_strip = 0; cur_strip < strips; cur_strip++)
-		{
-		v4_codebook = cvinfo->v4_codebook[cur_strip];
-		v1_codebook = cvinfo->v1_codebook[cur_strip];
-
-		if((cur_strip > 0) && (!(frame_flags & 0x01)))
-			{
-			memcpy(cvinfo->v4_codebook[cur_strip], cvinfo->v4_codebook[cur_strip-1], 260 * sizeof(cvid_codebook));
-			memcpy(cvinfo->v1_codebook[cur_strip], cvinfo->v1_codebook[cur_strip-1], 260 * sizeof(cvid_codebook));
-			}
-
-		strip_id = get_word();		/* 1000 = key strip, 1100 = iter strip */
-		top_size = get_word();
-		y0 = get_word();		/* FIXME: most of these are ignored at the moment */
-		x0 = get_word();
-		y1 = get_word();
-		x1 = get_word();
-
-		y_bottom += y1;
-		top_size -= 12;
-		x = 0;
-//		if(x1 != (unsigned int)mpi->width) 
-//			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: Warning x1 (%ld) != width (%d)\n", x1, mpi->width);
-
-//x1 = mpi->width;
-#if DBUG
-	mp_msg(MSGT_DECVIDEO, MSGL_WARN, "   %d) %04lx %04ld <%ld,%ld> <%ld,%ld> yt %ld  %d\n",
-		cur_strip, strip_id, top_size, x0, y0, x1, y1, y_bottom);
-#endif
-
-		while(top_size > 0)
-			{
-			chunk_id  = get_word();
-			chunk_size = get_word();
-
-#if DBUG
-	mp_msg(MSGT_DECVIDEO, MSGL_WARN, "        %04lx %04ld\n", chunk_id, chunk_size);
-#endif
-			top_size -= chunk_size;
-			chunk_size -= 4;
-
-			switch(chunk_id)
-				{
-					/* -------------------- Codebook Entries -------------------- */
-				case 0x2000:
-				case 0x2200:
-					modulo = chunk_size % 6;
-					codebook = (chunk_id == 0x2200 ? v1_codebook : v4_codebook);
-					cnum = (chunk_size - modulo) / 6;
-					for(i = 0; i < cnum; i++) read_codebook(codebook+i, 0);
-					while (modulo--)
-						in_buffer++;
-					break;
-
-				case 0x2400:
-				case 0x2600:		/* 8 bit per pixel */
-					codebook = (chunk_id == 0x2600 ? v1_codebook : v4_codebook);
-					cnum = chunk_size/4;  
-					for(i = 0; i < cnum; i++) read_codebook(codebook+i, 1);
-					break;
-
-				case 0x2100:
-				case 0x2300:
-					codebook = (chunk_id == 0x2300 ? v1_codebook : v4_codebook);
-
-					ci = 0;
-					while(chunk_size > 3)
-						{
-						flag = get_long();
-						chunk_size -= 4;
-
-						for(i = 0; i < 32; i++)
-							{
-							if(flag & 0x80000000)
-								{
-								chunk_size -= 6;
-								read_codebook(codebook+ci, 0);
-								}
-
-							ci++;
-							flag <<= 1;
-							}
-						}
-					while(chunk_size > 0) { skip_byte(); chunk_size--; }
-					break;
-
-				case 0x2500:
-				case 0x2700:		/* 8 bit per pixel */
-					codebook = (chunk_id == 0x2700 ? v1_codebook : v4_codebook);
-
-					ci = 0;
-					while(chunk_size > 0)
-						{
-						flag = get_long();
-						chunk_size -= 4;
-
-						for(i = 0; i < 32; i++)
-							{
-							if(flag & 0x80000000)
-								{
-								chunk_size -= 4;
-								read_codebook(codebook+ci, 1);
-								}
-
-							ci++;
-							flag <<= 1;
-							}
-						}
-					while(chunk_size > 0) { skip_byte(); chunk_size--; }
-					break;
-
-					/* -------------------- Frame -------------------- */
-				case 0x3000: 
-					while((chunk_size > 0) && (y < y_bottom))
-						{
-						flag = get_long();
-						chunk_size -= 4;
-						
-						for(i = 0; i < 32; i++)
-							{ 
-							if(y >= y_bottom) break;
-							if(flag & 0x80000000)	/* 4 bytes per block */
-								{
-								d0 = get_byte();
-								d1 = get_byte();
-								d2 = get_byte();
-								d3 = get_byte();
-								chunk_size -= 4;
-								cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3);
-								}
-							else		/* 1 byte per block */
-								{
-								cvid_v1(mpi, x, y, v1_codebook + get_byte());
-								chunk_size--;
-								}
-
-							x += 4;
-							if(x >= (unsigned int)x1)
-								{
-								x = 0;
-								y += 4;
-								}
-							flag <<= 1;
-							}
-						}
-					while(chunk_size > 0) { skip_byte(); chunk_size--; }
-					break;
-
-				case 0x3100:
-					while((chunk_size > 0) && (y < y_bottom))
-						{
-							/* ---- flag bits: 0 = SKIP, 10 = V1, 11 = V4 ---- */
-						flag = (unsigned long)get_long();
-						chunk_size -= 4;
-						mask = 0x80000000;
-
-						while((mask) && (y < y_bottom))
-							{
-							if(flag & mask)
-								{
-								if(mask == 1)
-									{
-									if(chunk_size < 0) break;
-									flag = (unsigned long)get_long();
-									chunk_size -= 4;
-									mask = 0x80000000;
-									}
-								else mask >>= 1;
-								
-								if(flag & mask)		/* V4 */
-									{
-									d0 = get_byte();
-									d1 = get_byte();
-									d2 = get_byte();
-									d3 = get_byte();
-									chunk_size -= 4;
-									cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3);
-									}
-								else		/* V1 */
-									{
-									chunk_size--;
-									cvid_v1(mpi, x, y, v1_codebook + get_byte());
-									}
-								}		/* else SKIP */
-
-							mask >>= 1;
-							x += 4;
-							if(x >= (unsigned int)x1)
-								{
-								x = 0;
-								y += 4;
-								}
-							}
-						}
-
-					while(chunk_size > 0) { skip_byte(); chunk_size--; }
-					break;
-
-				case 0x3200:		/* each byte is a V1 codebook */
-					while((chunk_size > 0) && (y < y_bottom))
-						{
-						cvid_v1(mpi, x, y, v1_codebook + get_byte());
-						chunk_size--;
-						x += 4;
-						if(x >= (unsigned int)x1)
-							{
-							x = 0;
-							y += 4;
-							}
-						}
-					while(chunk_size > 0) { skip_byte(); chunk_size--; }
-					break;
-
-				default:
-					mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: unknown chunk_id %08lx\n", chunk_id);
-					while(chunk_size > 0) { skip_byte(); chunk_size--; }
-					break;
-				}
-			}
-		}
-
-	if(len != size)
-		{
-		if(len & 0x01) len++; /* AVIs tend to have a size mismatch */
-		if(len != size)
-			{
-			long xlen;
-			skip_byte();
-			xlen = get_byte() << 16;
-			xlen |= get_byte() << 8;
-			xlen |= get_byte(); /* Read Len */
-			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: END INFO chunk size %d cvid size1 %ld cvid size2 %ld\n", size, len, xlen);
-			}
-		}
-}
-
--- a/cyuv.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-/* ------------------------------------------------------------------------
- * Creative YUV Video Decoder
- *
- * Dr. Tim Ferguson, 2001.
- * For more details on the algorithm:
- *         http://www.csse.monash.edu.au/~timf/videocodec.html
- *
- * This is a very simple predictive coder.  A video frame is coded in YUV411
- * format.  The first pixel of each scanline is coded using the upper four
- * bits of its absolute value.  Subsequent pixels for the scanline are coded
- * using the difference between the last pixel and the current pixel (DPCM).
- * The DPCM values are coded using a 16 entry table found at the start of the
- * frame.  Thus four bits per component are used and are as follows:
- *     UY VY YY UY VY YY UY VY...
- * This code assumes the frame width will be a multiple of four pixels.  This
- * should probably be fixed.
- * ------------------------------------------------------------------------ */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "loader/wine/avifmt.h" // for mmioFOURCC macro
-
-/* ------------------------------------------------------------------------
- * This function decodes a buffer containing a CYUV encoded frame.
- *
- * buf - the input buffer to be decoded
- * size - the size of the input buffer
- * frame - the output frame buffer (UYVY format)
- * width - the width of the output frame
- * height - the height of the output frame
- * format - the requested output format
- */
-void decode_cyuv(unsigned char *buf, int size, unsigned char *frame, int width, int height, int format)
-{
-int i, xpos, ypos, cur_Y = 0, cur_U = 0, cur_V = 0;
-char *delta_y_tbl, *delta_c_tbl, *ptr;
-
-	delta_y_tbl = buf + 16;
-	delta_c_tbl = buf + 32;
-	ptr = buf + (16 * 3);
-
-	for(ypos = 0; ypos < height; ypos++)
-		for(xpos = 0; xpos < width; xpos += 4)
-			{
-			if(xpos == 0)		/* first pixels in scanline */
-				{
-				cur_U = *(ptr++);
-				cur_Y = (cur_U & 0x0f) << 4;
-				cur_U = cur_U & 0xf0;
-				if (format == mmioFOURCC('Y','U','Y','2'))
-					{
-					*frame++ = cur_Y;
-					*frame++ = cur_U;
-					}
-				else
-					{
-					*frame++ = cur_U;
-					*frame++ = cur_Y;
-					}
-
-				cur_V = *(ptr++);
-				cur_Y = (cur_Y + delta_y_tbl[cur_V & 0x0f]) & 0xff;
-				cur_V = cur_V & 0xf0;
-				if (format == mmioFOURCC('Y','U','Y','2'))
-					{
-					*frame++ = cur_Y;
-					*frame++ = cur_V;
-					}
-				else
-					{
-					*frame++ = cur_V;
-					*frame++ = cur_Y;
-					}
-				}
-			else			/* subsequent pixels in scanline */
-				{
-				i = *(ptr++);
-				cur_U = (cur_U + delta_c_tbl[i >> 4]) & 0xff;
-				cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff;
-				if (format == mmioFOURCC('Y','U','Y','2'))
-					{
-					*frame++ = cur_Y;
-					*frame++ = cur_U;
-					}
-				else
-					{
-					*frame++ = cur_U;
-					*frame++ = cur_Y;
-					}
-
-				i = *(ptr++);
-				cur_V = (cur_V + delta_c_tbl[i >> 4]) & 0xff;
-				cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff;
-				if (format == mmioFOURCC('Y','U','Y','2'))
-					{
-					*frame++ = cur_Y;
-					*frame++ = cur_V;
-					}
-				else
-					{
-					*frame++ = cur_V;
-					*frame++ = cur_Y;
-					}
-				}
-
-			i = *(ptr++);
-			cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff;
-			if (format == mmioFOURCC('Y','U','Y','2'))
-				{
-				*frame++ = cur_Y;
-				*frame++ = cur_U;
-				}
-			else
-				{
-				*frame++ = cur_U;
-				*frame++ = cur_Y;
-				}
-
-			cur_Y = (cur_Y + delta_y_tbl[i >> 4]) & 0xff;
-			if (format == mmioFOURCC('Y','U','Y','2'))
-				{
-				*frame++ = cur_Y;
-				*frame++ = cur_V;
-				}
-			else
-				{
-				*frame++ = cur_V;
-				*frame++ = cur_Y;
-				}
-			}
-}
-
--- a/fli.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,379 +0,0 @@
-/*
-    FLI Decoder for MPlayer
-    
-    (C) 2001 Mike Melanson
-    
-    32bpp support (c) alex
-
-    Additional code and bug fixes by Roberto Togni
-
-    For information on the FLI format, as well as various traps to
-    avoid while programming one, visit:
-      http://www.pcisys.net/~melanson/codecs/
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "config.h"
-#include "bswap.h"
-#include "mp_msg.h"
-
-#define LE_16(x) (le2me_16(*(unsigned short *)(x)))
-#define LE_32(x) (le2me_32(*(unsigned int *)(x)))
-
-#define FLI_256_COLOR 4
-#define FLI_DELTA     7
-#define FLI_COLOR     11
-#define FLI_LC        12
-#define FLI_BLACK     13
-#define FLI_BRUN      15
-#define FLI_COPY      16
-#define FLI_MINI      18
-
-// 256 RGB entries; 25% of these bytes will be unused, but it's faster
-// to index 4-byte entries
-#define PALETTE_SIZE 1024
-static unsigned char palette[PALETTE_SIZE];
-
-void *init_fli_decoder(int width, int height)
-{
-  memset(palette, 0, PALETTE_SIZE);
-
-  return malloc(width * height * sizeof (unsigned char));
-}
-
-void decode_fli_frame(
-  unsigned char *encoded,
-  int encoded_size,
-  unsigned char *decoded,
-  int width,
-  int height,
-  int bytes_per_pixel,
-  void *context)
-{
-  int stream_ptr = 0;
-  int stream_ptr_after_color_chunk;
-  int pixel_ptr;
-  int palette_ptr1;
-  int palette_ptr2;
-  unsigned char palette_idx1;
-  unsigned char palette_idx2;
-
-  unsigned int frame_size;
-  int num_chunks;
-
-  unsigned int chunk_size;
-  int chunk_type;
-
-  int i, j;
-
-  int color_packets;
-  int color_changes;
-  int color_scale;
-
-  int lines;
-  int compressed_lines;
-  int starting_line;
-  signed short line_packets;
-  int y_ptr;
-  int line_inc = width * bytes_per_pixel;
-  signed char byte_run;
-  int pixel_skip;
-  int update_whole_frame = 0; // Palette change flag
-  unsigned char *fli_ghost_image = (unsigned char *)context;
-  int ghost_pixel_ptr;
-  int ghost_y_ptr;
-
-  frame_size = LE_32(&encoded[stream_ptr]);
-  stream_ptr += 6;  // skip the magic number
-  num_chunks = LE_16(&encoded[stream_ptr]);
-  stream_ptr += 10;  // skip padding
-
-  // iterate through the chunks
-  frame_size -= 16;
-  while ((frame_size > 0) && (num_chunks > 0))
-  {
-    chunk_size = LE_32(&encoded[stream_ptr]);
-    stream_ptr += 4;
-    chunk_type = LE_16(&encoded[stream_ptr]);
-    stream_ptr += 2;
-
-    switch (chunk_type)
-    {
-    case FLI_256_COLOR:
-    case FLI_COLOR:
-      stream_ptr_after_color_chunk = stream_ptr + chunk_size - 6;
-      if (chunk_type == FLI_COLOR)
-        color_scale = 4;
-      else
-        color_scale = 1;
-      // set up the palette
-      color_packets = LE_16(&encoded[stream_ptr]);
-      stream_ptr += 2;
-      palette_ptr1 = 0;
-      for (i = 0; i < color_packets; i++)
-      {
-        // first byte is how many colors to skip
-        palette_ptr1 += (encoded[stream_ptr++] * 4);
-        // wrap around, for good measure
-        if (palette_ptr1 >= PALETTE_SIZE)
-          palette_ptr1 = 0;
-        // next byte indicates how many entries to change
-        color_changes = encoded[stream_ptr++];
-        // if there are 0 color changes, there are actually 256
-        if (color_changes == 0)
-          color_changes = 256;
-        for (j = 0; j < color_changes; j++)
-        {
-          palette[palette_ptr1++] = encoded[stream_ptr + 2] * color_scale;
-          palette[palette_ptr1++] = encoded[stream_ptr + 1] * color_scale;
-          palette[palette_ptr1++] = encoded[stream_ptr + 0] * color_scale;
-          palette_ptr1++;
-          stream_ptr += 3;
-        }
-      }
-
-      // color chunks sometimes have weird 16-bit alignment issues;
-      // therefore, take the hardline approach and set the stream_ptr
-      // to the value calculate w.r.t. the size specified by the color
-      // chunk header
-      stream_ptr = stream_ptr_after_color_chunk;
-
-      /* Palette has changed, must update frame */
-      update_whole_frame = 1;
-      break;
-
-    case FLI_DELTA:
-      y_ptr = ghost_y_ptr = 0;
-      compressed_lines = LE_16(&encoded[stream_ptr]);
-      stream_ptr += 2;
-      while (compressed_lines > 0)
-      {
-        line_packets = LE_16(&encoded[stream_ptr]);
-        stream_ptr += 2;
-        if (line_packets < 0)
-        {
-          line_packets = -line_packets;
-          y_ptr += (line_packets * line_inc);
-          ghost_y_ptr += (line_packets * width);
-        }
-        else
-        {
-          pixel_ptr = y_ptr;
-          ghost_pixel_ptr = ghost_y_ptr;
-          for (i = 0; i < line_packets; i++)
-          {
-            // account for the skip bytes
-            pixel_skip = encoded[stream_ptr++];
-            pixel_ptr += pixel_skip * bytes_per_pixel;
-            ghost_pixel_ptr += pixel_skip;
-            byte_run = encoded[stream_ptr++];
-            if (byte_run < 0)
-            {
-              byte_run = -byte_run;
-              palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
-              palette_ptr2 = (palette_idx2 = encoded[stream_ptr++]) * 4;
-              for (j = 0; j < byte_run; j++)
-              {
-                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-
-                fli_ghost_image[ghost_pixel_ptr++] = palette_idx2;
-                decoded[pixel_ptr++] = palette[palette_ptr2 + 0];
-                decoded[pixel_ptr++] = palette[palette_ptr2 + 1];
-                decoded[pixel_ptr++] = palette[palette_ptr2 + 2];
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-            }
-            else
-            {
-              for (j = 0; j < byte_run * 2; j++)
-              {
-                palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
-                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-            }
-          }
-          y_ptr += line_inc;
-          ghost_y_ptr += width;
-          compressed_lines--;
-        }
-      }
-      break;
-
-    case FLI_LC:
-      // line compressed
-      starting_line = LE_16(&encoded[stream_ptr]);
-      stream_ptr += 2;
-      y_ptr = starting_line * line_inc;
-      ghost_y_ptr = starting_line * width;
-
-      compressed_lines = LE_16(&encoded[stream_ptr]);
-      stream_ptr += 2;
-      while (compressed_lines > 0)
-      {
-        pixel_ptr = y_ptr;
-        ghost_pixel_ptr = ghost_y_ptr;
-        line_packets = encoded[stream_ptr++];
-        if (line_packets > 0)
-        {
-          for (i = 0; i < line_packets; i++)
-          {
-            // account for the skip bytes
-            pixel_skip = encoded[stream_ptr++];
-            pixel_ptr += pixel_skip * bytes_per_pixel;
-            ghost_pixel_ptr += pixel_skip;
-            byte_run = encoded[stream_ptr++];
-            if (byte_run > 0)
-            {
-              for (j = 0; j < byte_run; j++)
-              {
-                palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
-                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-            }
-            else
-            {
-              byte_run = -byte_run;
-              palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
-              for (j = 0; j < byte_run; j++)
-              {
-                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
-                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-            }
-          }
-        }
-
-        y_ptr += line_inc;
-        ghost_y_ptr += width;
-        compressed_lines--;
-      }
-      break;
-
-    case FLI_BLACK:
-      // set the whole frame to color 0 (which is usually black) by
-      // clearing the ghost image and trigger a full frame update
-      memset(fli_ghost_image, 0, width * height * sizeof(unsigned char));
-      update_whole_frame = 1;
-      break;
-
-    case FLI_BRUN:
-      // byte run compression
-      y_ptr = 0;
-      ghost_y_ptr = 0;
-      for (lines = 0; lines < height; lines++)
-      {
-        pixel_ptr = y_ptr;
-        ghost_pixel_ptr = ghost_y_ptr;
-        line_packets = encoded[stream_ptr++];
-        for (i = 0; i < line_packets; i++)
-        {
-          byte_run = encoded[stream_ptr++];
-          if (byte_run > 0)
-          {
-            palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
-            for (j = 0; j < byte_run; j++)
-            {
-              fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
-              decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
-              decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
-              decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
-	      if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-            }
-          }
-          else  // copy bytes if byte_run < 0
-          {
-            byte_run = -byte_run;
-            for (j = 0; j < byte_run; j++)
-            {
-              palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
-              fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
-              decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
-              decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
-              decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
-	      if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-            }
-          }
-        }
-
-        y_ptr += line_inc;
-        ghost_y_ptr += width;
-      }
-      break;
-
-    case FLI_COPY:
-      // copy the chunk (uncompressed frame) to the ghost image and
-      // schedule the whole frame to be updated
-      if (chunk_size - 6 > width * height)
-      {
-        mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-         "FLI: in chunk FLI_COPY : source data (%d bytes) bigger than image," \
-         " skipping chunk\n",
-         chunk_size - 6);
-         break;
-      }
-      else
-        memcpy(fli_ghost_image, &encoded[stream_ptr], chunk_size - 6);
-      stream_ptr += chunk_size - 6;
-      update_whole_frame = 1;
-      break;
-
-    case FLI_MINI:
-      // sort of a thumbnail? disregard this chunk...
-      stream_ptr += chunk_size - 6;
-      break;
-
-    default:
-      mp_msg (MSGT_DECVIDEO, MSGL_WARN,
-       "FLI: Unrecognized chunk type: %d\n", chunk_type);
-      break;
-    }
-
-    frame_size -= chunk_size;
-    num_chunks--;
-  }
-
-  if (update_whole_frame)
-  {
-    pixel_ptr = ghost_pixel_ptr = 0;
-    while (pixel_ptr < (width * height * bytes_per_pixel))
-    {
-      palette_ptr1 = fli_ghost_image[ghost_pixel_ptr++] * 4;
-      decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
-      decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
-      decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
-      if (bytes_per_pixel == 4) /* 32bpp */
-        pixel_ptr++;
-    }
-  }
-
-  // by the end of the chunk, the stream ptr should equal the frame 
-  // size (minus 1, possibly); if it doesn't, issue a warning
-  if ((stream_ptr != encoded_size) && (stream_ptr != encoded_size - 1))
-    mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-      "  warning: processed FLI chunk where encoded size = %d\n" \
-      "  and final chunk ptr = %d\n",
-      encoded_size, stream_ptr);
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/RTjpegN.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,3800 @@
+/* 
+   RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za)
+   
+   With modifications by:
+   (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de>
+   and
+   (c) 1999 by Wim Taymans <wim.taymans@tvd.be>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+    
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#ifdef HAVE_MMX
+#define MMX
+#endif
+
+#include "RTjpegN.h"
+
+#ifdef MMX
+#include "mmx.h"
+#endif
+
+//#define SHOWBLOCK 1
+#define BETTERCOMPRESSION 1
+
+static const unsigned char RTjpeg_ZZ[64]={
+0,
+8, 1,
+2, 9, 16,
+24, 17, 10, 3,
+4, 11, 18, 25, 32,
+40, 33, 26, 19, 12, 5,
+6, 13, 20, 27, 34, 41, 48,
+56, 49, 42, 35, 28, 21, 14, 7,
+15, 22, 29, 36, 43, 50, 57,
+58, 51, 44, 37, 30, 23,
+31, 38, 45, 52, 59,
+60, 53, 46, 39,
+47, 54, 61,
+62, 55,
+63 };
+
+static const __u64 RTjpeg_aan_tab[64]={
+4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, 
+5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL, 
+5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL, 
+5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL, 
+4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, 
+3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL, 
+2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL, 
+1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL, 
+};
+
+#ifndef MMX
+static __s32 RTjpeg_ws[64+31];
+#endif
+__u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32];
+
+static __s16 *block; // rh
+static __s16 *RTjpeg_block;
+static __s32 *RTjpeg_lqt;
+static __s32 *RTjpeg_cqt;
+static __u32 *RTjpeg_liqt;
+static __u32 *RTjpeg_ciqt;
+
+static unsigned char RTjpeg_lb8;
+static unsigned char RTjpeg_cb8;
+static int RTjpeg_width, RTjpeg_height;
+static int RTjpeg_Ywidth, RTjpeg_Cwidth;
+static int RTjpeg_Ysize, RTjpeg_Csize;
+
+static __s16 *RTjpeg_old=NULL;
+
+#ifdef MMX
+mmx_t RTjpeg_lmask;
+mmx_t RTjpeg_cmask;
+#else
+__u16 RTjpeg_lmask;
+__u16 RTjpeg_cmask;
+#endif
+int RTjpeg_mtest=0;
+
+static const unsigned char RTjpeg_lum_quant_tbl[64] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+ };
+
+static const unsigned char RTjpeg_chrom_quant_tbl[64] = {
+    17,  18,  24,  47,  99,  99,  99,  99,
+    18,  21,  26,  66,  99,  99,  99,  99,
+    24,  26,  56,  99,  99,  99,  99,  99,
+    47,  66,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99
+ };
+
+#ifdef BETTERCOMPRESSION 
+
+/*--------------------------------------------------*/
+/*  better encoding, but needs a lot more cpu time  */
+/*  seems to be more effective than old method +lzo */
+/*  with this encoding lzo isn't efficient anymore  */
+/*  there is still more potential for better        */
+/*  encoding but that would need even more cputime  */
+/*  anyway your mileage may vary                    */
+/*                                                  */
+/*  written by Martin BIELY and Roman HOCHLEITNER   */
+/*--------------------------------------------------*/
+
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* Block to Stream (encoding)                         */
+/*                                                    */
+
+int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
+{
+ register int ci, co=1;
+ register __s16 ZZvalue;
+ register unsigned char bitten;
+ register unsigned char bitoff;
+
+#ifdef SHOWBLOCK
+
+  int ii;
+  for (ii=0; ii < 64; ii++) {
+    fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
+  }
+  fprintf(stdout, "\n\n");
+
+#endif
+
+// *strm++ = 0x10;
+// *strm   = 0x00;
+//
+// return 2;
+
+ // first byte allways written
+ (__u8)strm[0]=
+      (__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
+
+
+ ci=63;
+ while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--;
+
+ bitten = ((unsigned char)ci) << 2;
+
+ if (ci==0) {
+   (__u8)strm[1]= bitten; 
+   co = 2;
+   return (int)co;
+ }
+ 
+ /* bitoff=0 because the high 6bit contain first non zero position */ 
+ bitoff = 0;
+ co = 1;
+
+ for(; ci>0; ci--) {
+
+   ZZvalue = data[RTjpeg_ZZ[ci]];
+   
+   switch(ZZvalue) {
+   case 0:
+	break;
+   case 1:
+        bitten |= (0x01<<bitoff);
+	break;
+   case -1:
+        bitten |= (0x03<<bitoff);
+	break;
+   default:
+        bitten |= (0x02<<bitoff);
+	goto HERZWEH;
+	break;
+   }
+
+   if( bitoff == 0 ) {
+      (__u8)strm[co]= bitten; 
+      bitten = 0;
+      bitoff = 8; 
+      co++;
+   } /* "fall through" */
+   bitoff-=2; 
+
+ }
+ 
+ /* ci must be 0 */
+ if(bitoff != 6) {
+
+      (__u8)strm[co]= bitten; 
+      co++;
+     
+ }
+ goto BAUCHWEH;
+
+HERZWEH:
+/* ci cannot be 0 */
+/* correct bitoff to nibble boundaries */
+
+ switch(bitoff){
+ case 4:
+ case 6:
+   bitoff = 0;
+   break;
+ case 2:
+ case 0:
+   (__u8)strm[co]= bitten; 
+   bitoff = 4;
+   co++;
+   bitten = 0; // clear half nibble values in bitten
+   break;
+ default:
+   break;
+ }
+
+ for(; ci>0; ci--) {
+
+   ZZvalue = data[RTjpeg_ZZ[ci]];
+
+   if( (ZZvalue > 7) || (ZZvalue < -7) ) {
+        bitten |= (0x08<<bitoff);
+	goto HIRNWEH;
+   } 
+   
+   bitten |= (ZZvalue&0xf)<<bitoff;
+
+   if( bitoff == 0 ) {
+      (__u8)strm[co]= bitten; 
+      bitten = 0;
+      bitoff = 8;
+      co++;
+   } /* "fall thru" */
+   bitoff-=4;
+ }
+ 
+ /* ci must be 0 */
+ if( bitoff == 0 ) {
+    (__u8)strm[co]= bitten; 
+    co++;
+ }  
+ goto BAUCHWEH;
+
+HIRNWEH:
+
+ (__u8)strm[co]= bitten; 
+ co++;
+  
+
+ /* bitting is over now we bite */
+ for(; ci>0; ci--) {
+
+   ZZvalue = data[RTjpeg_ZZ[ci]];
+
+   if(ZZvalue>0) 
+   {
+     strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
+   } 
+   else 
+   {
+     strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
+   }
+
+ }
+ 
+
+BAUCHWEH:
+  /* we gotoo much now we are ill */
+#ifdef SHOWBLOCK
+{
+int i;
+fprintf(stdout, "\nco = '%d'\n", co);
+ for (i=0; i < co+2; i++) {
+   fprintf(stdout, "%d ", strm[i]);
+ }
+fprintf(stdout, "\n\n");
+}
+#endif
+
+ return (int)co;
+}
+
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* Stream to Block  (decoding)                        */
+/*                                                    */
+
+int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
+{
+ int ci;
+ register int co;
+ register int i;
+ register unsigned char bitten;
+ register unsigned char bitoff;
+
+ /* first byte always read */
+ i=RTjpeg_ZZ[0];
+ data[i]=((__u8)strm[0])*qtbl[i];
+
+ /* we start at the behind */ 
+
+ bitten = ((unsigned char)strm[1]) >> 2;
+ co = 63;
+ for(; co > bitten; co--) {
+
+   data[RTjpeg_ZZ[co]] = 0;
+
+ }
+
+ if (co==0) {
+   ci = 2;
+   goto AUTOBAHN;
+ }
+
+ /* we have to read the last 2 bits of the second byte */
+ ci=1;
+ bitoff = 0;
+
+ for(; co>0; co--) {
+
+  bitten  = ((unsigned char)strm[ci]) >> bitoff;
+  bitten &= 0x03;
+
+  i=RTjpeg_ZZ[co];
+
+  switch( bitten ) {
+  case 0x03:
+    data[i]= -qtbl[i];
+    break;
+  case 0x02:
+    goto FUSSWEG;
+    break;
+  case 0x01:
+    data[i]= qtbl[i];
+    break;
+  case 0x00:
+    data[i]= 0;
+    break;
+  default: 
+	
+  } 
+
+  if( bitoff == 0 ) {
+    bitoff = 8;
+    ci++;
+  }
+  bitoff -= 2;
+ }
+ /* co is 0 now */
+ /* data is written properly */
+
+ /* if bitoff!=6 then ci is the index, but should be the byte count, so we increment by 1 */
+ if (bitoff!=6) ci++;
+
+ goto AUTOBAHN;
+ 
+ 
+FUSSWEG:
+/* correct bitoff to nibble */
+ switch(bitoff){
+ case 4:
+ case 6:
+   bitoff = 0;
+   break;
+ case 2:
+ case 0:
+   /* we have to read from the next byte */
+   ci++;
+   bitoff = 4;
+   break;
+ default:
+   break;
+ }
+
+ for(; co>0; co--) {
+
+  bitten  = ((unsigned char)strm[ci]) >> bitoff;
+  bitten &= 0x0f;
+
+  i=RTjpeg_ZZ[co];
+
+  if( bitten == 0x08 ) {
+    goto STRASSE;
+  }
+
+  /* the compiler cannot do sign extension for signed nibbles */
+  if( bitten & 0x08 ) {
+    bitten |= 0xf0;
+  }
+  /* the unsigned char bitten now is a valid signed char */
+   
+  data[i]=((signed char)bitten)*qtbl[i];
+  
+  if( bitoff == 0 ) {
+    bitoff = 8;
+    ci++;
+  }
+  bitoff -= 4;
+ }
+ /* co is 0 */
+
+ /* if bitoff!=4 then ci is the index, but should be the byte count, so we increment by 1 */
+ if (bitoff!=4) ci++;
+
+ goto AUTOBAHN;
+
+STRASSE:
+  ci++; 
+ 
+ for(; co>0; co--) {
+  i=RTjpeg_ZZ[co];
+  data[i]=strm[ci++]*qtbl[i];
+ }
+
+ /* ci now is the count, because it points to next element => no incrementing */
+
+AUTOBAHN:
+
+#ifdef SHOWBLOCK
+fprintf(stdout, "\nci = '%d'\n", ci);
+ for (i=0; i < 64; i++) {
+   fprintf(stdout, "%d ", data[RTjpeg_ZZ[i]]);
+ }
+fprintf(stdout, "\n\n");
+#endif
+
+ return ci;
+}
+
+#else
+
+int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
+{
+ register int ci, co=1, tmp;
+ register __s16 ZZvalue;
+
+#ifdef SHOWBLOCK
+
+  int ii;
+  for (ii=0; ii < 64; ii++) {
+    fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
+  }
+  fprintf(stdout, "\n\n");
+
+#endif
+
+ (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
+
+ for(ci=1; ci<=bt8; ci++) 
+ {
+	ZZvalue = data[RTjpeg_ZZ[ci]];
+
+   if(ZZvalue>0) 
+	{
+     strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
+   } 
+	else 
+	{
+     strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
+   }
+ }
+
+ for(; ci<64; ci++) 
+ {
+  ZZvalue = data[RTjpeg_ZZ[ci]];
+
+  if(ZZvalue>0)
+  {
+   strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue;
+  } 
+  else if(ZZvalue<0)
+  {
+   strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue;
+  } 
+  else /* compress zeros */
+  {
+   tmp=ci;
+   do
+   {
+    ci++;
+   } 
+	while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0));
+
+   strm[co++]=(__s8)(63+(ci-tmp));
+   ci--;
+  }
+ }
+ return (int)co;
+}
+
+int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
+{
+ int ci=1, co=1, tmp;
+ register int i;
+
+ i=RTjpeg_ZZ[0];
+ data[i]=((__u8)strm[0])*qtbl[i];
+
+ for(co=1; co<=bt8; co++)
+ {
+  i=RTjpeg_ZZ[co];
+  data[i]=strm[ci++]*qtbl[i];
+ }
+ 
+ for(; co<64; co++)
+ {
+  if(strm[ci]>63)
+  {
+   tmp=co+strm[ci]-63;
+   for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0;
+   co--;
+  } else
+  {
+   i=RTjpeg_ZZ[co];
+   data[i]=strm[ci]*qtbl[i];
+  }
+  ci++;
+ }
+ return (int)ci;
+}
+#endif
+
+#if defined(MMX)
+void RTjpeg_quant_init(void)
+{
+ int i;
+ __s16 *qtbl;
+ 
+ qtbl=(__s16 *)RTjpeg_lqt;
+ for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i];
+
+ qtbl=(__s16 *)RTjpeg_cqt;
+ for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i];
+}
+
+static mmx_t RTjpeg_ones=(mmx_t)(long long)0x0001000100010001LL;
+static mmx_t RTjpeg_half=(mmx_t)(long long)0x7fff7fff7fff7fffLL;
+
+void RTjpeg_quant(__s16 *block, __s32 *qtbl)
+{
+ int i;
+ mmx_t *bl, *ql;
+ 
+ ql=(mmx_t *)qtbl;
+ bl=(mmx_t *)block;
+ 
+ movq_m2r(RTjpeg_ones, mm6);
+ movq_m2r(RTjpeg_half, mm7);
+
+ for(i=16; i; i--) 
+ {
+  movq_m2r(*(ql++), mm0); /* quant vals (4) */
+  movq_m2r(*bl, mm2); /* block vals (4) */
+  movq_r2r(mm0, mm1);
+  movq_r2r(mm2, mm3);
+  
+  punpcklwd_r2r(mm6, mm0); /*           1 qb 1 qa */
+  punpckhwd_r2r(mm6, mm1); /* 1 qd 1 qc */
+  
+  punpcklwd_r2r(mm7, mm2); /*                   32767 bb 32767 ba */
+  punpckhwd_r2r(mm7, mm3); /* 32767 bd 32767 bc */
+  
+  pmaddwd_r2r(mm2, mm0); /*                         32767+bb*qb 32767+ba*qa */
+  pmaddwd_r2r(mm3, mm1); /* 32767+bd*qd 32767+bc*qc */
+  
+  psrad_i2r(16, mm0);
+  psrad_i2r(16, mm1);
+  
+  packssdw_r2r(mm1, mm0);
+  
+  movq_r2m(mm0, *(bl++));
+  
+ }
+}
+#else
+void RTjpeg_quant_init(void)
+{
+}
+
+void RTjpeg_quant(__s16 *block, __s32 *qtbl)
+{
+ int i;
+ 
+ for(i=0; i<64; i++)
+   block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16);
+}
+#endif
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+#ifdef MMX
+static mmx_t RTjpeg_C4   =(mmx_t)(long long)0x2D412D412D412D41LL;
+static mmx_t RTjpeg_C6   =(mmx_t)(long long)0x187E187E187E187ELL;
+static mmx_t RTjpeg_C2mC6=(mmx_t)(long long)0x22A322A322A322A3LL;
+static mmx_t RTjpeg_C2pC6=(mmx_t)(long long)0x539F539F539F539FLL;
+static mmx_t RTjpeg_zero =(mmx_t)(long long)0x0000000000000000LL;
+
+#else
+
+#define FIX_0_382683433  ((__s32)   98)		/* FIX(0.382683433) */
+#define FIX_0_541196100  ((__s32)  139)		/* FIX(0.541196100) */
+#define FIX_0_707106781  ((__s32)  181)		/* FIX(0.707106781) */
+#define FIX_1_306562965  ((__s32)  334)		/* FIX(1.306562965) */
+
+#define DESCALE10(x) (__s16)( ((x)+128) >> 8)
+#define DESCALE20(x)  (__s16)(((x)+32768) >> 16)
+#define D_MULTIPLY(var,const)  ((__s32) ((var) * (const)))
+#endif
+
+void RTjpeg_dct_init(void)
+{
+ int i;
+ 
+ for(i=0; i<64; i++)
+ {
+  RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]);
+  RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]);
+ }
+}
+
+void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
+{
+#ifndef MMX
+  __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  __s32 tmp10, tmp11, tmp12, tmp13;
+  __s32 z1, z2, z3, z4, z5, z11, z13;
+  __u8 *idataptr;
+  __s16 *odataptr;
+  __s32 *wsptr;
+  int ctr;
+
+  idataptr = idata;
+  wsptr = RTjpeg_ws;
+  for (ctr = 7; ctr >= 0; ctr--) {
+    tmp0 = idataptr[0] + idataptr[7];
+    tmp7 = idataptr[0] - idataptr[7];
+    tmp1 = idataptr[1] + idataptr[6];
+    tmp6 = idataptr[1] - idataptr[6];
+    tmp2 = idataptr[2] + idataptr[5];
+    tmp5 = idataptr[2] - idataptr[5];
+    tmp3 = idataptr[3] + idataptr[4];
+    tmp4 = idataptr[3] - idataptr[4];
+    
+    tmp10 = (tmp0 + tmp3);	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = (tmp1 + tmp2);
+    tmp12 = tmp1 - tmp2;
+    
+    wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */
+    wsptr[4] = (tmp10 - tmp11)<<8;
+    
+    z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    wsptr[2] = (tmp13<<8) + z1;	/* phase 5 */
+    wsptr[6] = (tmp13<<8) - z1;
+    
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = (tmp7<<8) + z3;		/* phase 5 */
+    z13 = (tmp7<<8) - z3;
+
+    wsptr[5] = z13 + z2;	/* phase 6 */
+    wsptr[3] = z13 - z2;
+    wsptr[1] = z11 + z4;
+    wsptr[7] = z11 - z4;
+
+    idataptr += rskip<<3;		/* advance pointer to next row */
+    wsptr += 8;
+  }
+
+  wsptr = RTjpeg_ws;
+  odataptr=odata;
+  for (ctr = 7; ctr >= 0; ctr--) {
+    tmp0 = wsptr[0] + wsptr[56];
+    tmp7 = wsptr[0] - wsptr[56];
+    tmp1 = wsptr[8] + wsptr[48];
+    tmp6 = wsptr[8] - wsptr[48];
+    tmp2 = wsptr[16] + wsptr[40];
+    tmp5 = wsptr[16] - wsptr[40];
+    tmp3 = wsptr[24] + wsptr[32];
+    tmp4 = wsptr[24] - wsptr[32];
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */
+    odataptr[32] = DESCALE10(tmp10 - tmp11);
+    
+    z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */
+    odataptr[48] = DESCALE20((tmp13<<8) - z1);
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = (tmp7<<8) + z3;		/* phase 5 */
+    z13 = (tmp7<<8) - z3;
+
+    odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */
+    odataptr[24] = DESCALE20(z13 - z2);
+    odataptr[8] = DESCALE20(z11 + z4);
+    odataptr[56] = DESCALE20(z11 - z4);
+
+    odataptr++;			/* advance pointer to next column */
+    wsptr++;
+  }
+#else
+  volatile mmx_t tmp6, tmp7;
+  register mmx_t *dataptr = (mmx_t *)odata;
+  mmx_t *idata2 = (mmx_t *)idata;
+
+   // first copy the input 8 bit to the destination 16 bits
+
+   movq_m2r(RTjpeg_zero, mm2);
+
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+1));
+	
+	idata2 += rskip;
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr+2));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+3));
+	
+	idata2 += rskip;
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr+4));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+5));
+	
+	idata2 += rskip;
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr+6));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+7));
+	
+	idata2 += rskip;
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr+8));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+9));
+	
+	idata2 += rskip;
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr+10));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+11));
+	
+	idata2 += rskip;
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr+12));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+13));
+	
+	idata2 += rskip;
+
+	movq_m2r(*idata2, mm0);		 
+	movq_r2r(mm0, mm1);		 			
+
+	punpcklbw_r2r(mm2, mm0);
+	movq_r2m(mm0, *(dataptr+14));
+
+	punpckhbw_r2r(mm2, mm1);
+	movq_r2m(mm1, *(dataptr+15));
+
+/*  Start Transpose to do calculations on rows */
+
+	movq_m2r(*(dataptr+9), mm7);		 	// m03:m02|m01:m00 - first line (line 4)and copy into m5
+
+	movq_m2r(*(dataptr+13), mm6);	    	// m23:m22|m21:m20 - third line (line 6)and copy into m2
+	movq_r2r(mm7, mm5);		 			
+
+	punpcklwd_m2r(*(dataptr+11), mm7); 	// m11:m01|m10:m00 - interleave first and second lines
+	movq_r2r(mm6, mm2);						 
+
+	punpcklwd_m2r(*(dataptr+15), mm6);  // m31:m21|m30:m20 - interleave third and fourth lines
+	movq_r2r(mm7, mm1);
+
+	movq_m2r(*(dataptr+11), mm3);	      // m13:m13|m11:m10 - second line	 
+	punpckldq_r2r(mm6, mm7);				// m30:m20|m10:m00 - interleave to produce result 1
+
+	movq_m2r(*(dataptr+15), mm0);	      // m13:m13|m11:m10 - fourth line
+	punpckhdq_r2r(mm6, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
+
+	movq_r2m(mm7,*(dataptr+9));			// write result 1
+	punpckhwd_r2r(mm3, mm5);				// m13:m03|m12:m02 - interleave first and second lines
+	
+	movq_r2m(mm1,*(dataptr+11));			// write result 2
+	punpckhwd_r2r(mm0, mm2);				// m33:m23|m32:m22 - interleave third and fourth lines
+
+	movq_r2r(mm5, mm1);
+	punpckldq_r2r(mm2, mm5);				// m32:m22|m12:m02 - interleave to produce result 3
+
+	movq_m2r(*(dataptr+1), mm0);			// m03:m02|m01:m00 - first line, 4x4
+	punpckhdq_r2r(mm2, mm1);				// m33:m23|m13:m03 - interleave to produce result 4
+
+	movq_r2m(mm5,*(dataptr+13));			// write result 3
+
+	// last 4x4 done
+
+	movq_r2m(mm1, *(dataptr+15));			// write result 4, last 4x4
+
+	movq_m2r(*(dataptr+5), mm2);			// m23:m22|m21:m20 - third line
+	movq_r2r(mm0, mm6);
+
+	punpcklwd_m2r(*(dataptr+3), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
+	movq_r2r(mm2, mm7);
+
+	punpcklwd_m2r(*(dataptr+7), mm2);  	// m31:m21|m30:m20 - interleave third and fourth lines
+	movq_r2r(mm0, mm4);
+
+	//
+	movq_m2r(*(dataptr+8), mm1);			// n03:n02|n01:n00 - first line 
+	punpckldq_r2r(mm2, mm0);				// m30:m20|m10:m00 - interleave to produce first result
+
+	movq_m2r(*(dataptr+12), mm3);			// n23:n22|n21:n20 - third line
+	punpckhdq_r2r(mm2, mm4);				// m31:m21|m11:m01 - interleave to produce second result
+
+	punpckhwd_m2r(*(dataptr+3), mm6);  	// m13:m03|m12:m02 - interleave first and second lines
+	movq_r2r(mm1, mm2);               	// copy first line
+
+	punpckhwd_m2r(*(dataptr+7), mm7);  	// m33:m23|m32:m22 - interleave third and fourth lines
+	movq_r2r(mm6, mm5);						// copy first intermediate result
+
+	movq_r2m(mm0, *(dataptr+8));			// write result 1
+	punpckhdq_r2r(mm7, mm5);				// m33:m23|m13:m03 - produce third result
+
+	punpcklwd_m2r(*(dataptr+10), mm1);  // n11:n01|n10:n00 - interleave first and second lines
+	movq_r2r(mm3, mm0);						// copy third line
+
+	punpckhwd_m2r(*(dataptr+10), mm2);  // n13:n03|n12:n02 - interleave first and second lines
+
+	movq_r2m(mm4, *(dataptr+10));			// write result 2 out
+	punpckldq_r2r(mm7, mm6);				// m32:m22|m12:m02 - produce fourth result
+
+	punpcklwd_m2r(*(dataptr+14), mm3);  // n31:n21|n30:n20 - interleave third and fourth lines
+	movq_r2r(mm1, mm4);
+
+	movq_r2m(mm6, *(dataptr+12));			// write result 3 out
+	punpckldq_r2r(mm3, mm1);				// n30:n20|n10:n00 - produce first result
+
+	punpckhwd_m2r(*(dataptr+14), mm0);  // n33:n23|n32:n22 - interleave third and fourth lines
+	movq_r2r(mm2, mm6);
+
+	movq_r2m(mm5, *(dataptr+14));			// write result 4 out
+	punpckhdq_r2r(mm3, mm4);				// n31:n21|n11:n01- produce second result
+
+	movq_r2m(mm1, *(dataptr+1));			// write result 5 out - (first result for other 4 x 4 block)
+	punpckldq_r2r(mm0, mm2);				// n32:n22|n12:n02- produce third result
+
+	movq_r2m(mm4, *(dataptr+3));			// write result 6 out
+	punpckhdq_r2r(mm0, mm6);				// n33:n23|n13:n03 - produce fourth result
+
+	movq_r2m(mm2, *(dataptr+5));			// write result 7 out
+
+	movq_m2r(*dataptr, mm0);				// m03:m02|m01:m00 - first line, first 4x4
+
+	movq_r2m(mm6, *(dataptr+7));			// write result 8 out
+
+
+// Do first 4x4 quadrant, which is used in the beginning of the DCT:
+
+	movq_m2r(*(dataptr+4), mm7);			// m23:m22|m21:m20 - third line
+	movq_r2r(mm0, mm2);
+
+	punpcklwd_m2r(*(dataptr+2), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
+	movq_r2r(mm7, mm4);
+
+	punpcklwd_m2r(*(dataptr+6), mm7);  	// m31:m21|m30:m20 - interleave third and fourth lines
+	movq_r2r(mm0, mm1);
+
+	movq_m2r(*(dataptr+2), mm6);			// m13:m12|m11:m10 - second line
+	punpckldq_r2r(mm7, mm0);				// m30:m20|m10:m00 - interleave to produce result 1
+
+	movq_m2r(*(dataptr+6), mm5);			// m33:m32|m31:m30 - fourth line
+	punpckhdq_r2r(mm7, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
+
+	movq_r2r(mm0, mm7);						// write result 1
+	punpckhwd_r2r(mm6, mm2);				// m13:m03|m12:m02 - interleave first and second lines
+
+	psubw_m2r(*(dataptr+14), mm7);		// tmp07=x0-x7	/* Stage 1 */
+	movq_r2r(mm1, mm6);						// write result 2
+
+	paddw_m2r(*(dataptr+14), mm0);		// tmp00=x0+x7	/* Stage 1 */
+	punpckhwd_r2r(mm5, mm4);   			// m33:m23|m32:m22 - interleave third and fourth lines
+
+	paddw_m2r(*(dataptr+12), mm1);		// tmp01=x1+x6	/* Stage 1 */
+	movq_r2r(mm2, mm3);						// copy first intermediate result
+
+	psubw_m2r(*(dataptr+12), mm6);		// tmp06=x1-x6	/* Stage 1 */
+	punpckldq_r2r(mm4, mm2);				// m32:m22|m12:m02 - interleave to produce result 3
+
+   movq_r2m(mm7, tmp7);
+	movq_r2r(mm2, mm5);						// write result 3
+
+   movq_r2m(mm6, tmp6);
+	punpckhdq_r2r(mm4, mm3);				// m33:m23|m13:m03 - interleave to produce result 4
+
+	paddw_m2r(*(dataptr+10), mm2);     	// tmp02=x2+5 /* Stage 1 */
+	movq_r2r(mm3, mm4);						// write result 4
+
+/************************************************************************************************
+					End of Transpose
+************************************************************************************************/
+
+
+   paddw_m2r(*(dataptr+8), mm3);    	// tmp03=x3+x4 /* stage 1*/
+   movq_r2r(mm0, mm7);
+
+   psubw_m2r(*(dataptr+8), mm4);    	// tmp04=x3-x4 /* stage 1*/
+   movq_r2r(mm1, mm6);
+
+	paddw_r2r(mm3, mm0);  					// tmp10 = tmp00 + tmp03 /* even 2 */
+	psubw_r2r(mm3, mm7);  					// tmp13 = tmp00 - tmp03 /* even 2 */
+
+	psubw_r2r(mm2, mm6);  					// tmp12 = tmp01 - tmp02 /* even 2 */
+	paddw_r2r(mm2, mm1);  					// tmp11 = tmp01 + tmp02 /* even 2 */
+
+   psubw_m2r(*(dataptr+10), mm5);    	// tmp05=x2-x5 /* stage 1*/
+	paddw_r2r(mm7, mm6);						// tmp12 + tmp13
+
+	/* stage 3 */
+
+   movq_m2r(tmp6, mm2);
+   movq_r2r(mm0, mm3);
+
+	psllw_i2r(2, mm6);			// m8 * 2^2
+	paddw_r2r(mm1, mm0);		
+
+	pmulhw_m2r(RTjpeg_C4, mm6);			// z1
+	psubw_r2r(mm1, mm3);		
+
+   movq_r2m(mm0, *dataptr);
+   movq_r2r(mm7, mm0);
+   
+    /* Odd part */
+   movq_r2m(mm3, *(dataptr+8));
+	paddw_r2r(mm5, mm4);						// tmp10
+
+   movq_m2r(tmp7, mm3);
+	paddw_r2r(mm6, mm0);						// tmp32
+
+	paddw_r2r(mm2, mm5);						// tmp11
+	psubw_r2r(mm6, mm7);						// tmp33
+
+   movq_r2m(mm0, *(dataptr+4));
+	paddw_r2r(mm3, mm2);						// tmp12
+
+	/* stage 4 */
+
+   movq_r2m(mm7, *(dataptr+12));
+	movq_r2r(mm4, mm1);						// copy of tmp10
+
+	psubw_r2r(mm2, mm1);						// tmp10 - tmp12
+	psllw_i2r(2, mm4);			// m8 * 2^2
+
+	movq_m2r(RTjpeg_C2mC6, mm0);		
+	psllw_i2r(2, mm1);
+
+	pmulhw_m2r(RTjpeg_C6, mm1);			// z5
+	psllw_i2r(2, mm2);
+
+	pmulhw_r2r(mm0, mm4);					// z5
+
+	/* stage 5 */
+
+	pmulhw_m2r(RTjpeg_C2pC6, mm2);
+	psllw_i2r(2, mm5);
+
+	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
+	movq_r2r(mm3, mm0);						// copy tmp7
+
+   movq_m2r(*(dataptr+1), mm7);
+	paddw_r2r(mm1, mm4);						// z2
+
+	paddw_r2r(mm1, mm2);						// z4
+
+	paddw_r2r(mm5, mm0);						// z11
+	psubw_r2r(mm5, mm3);						// z13
+
+	/* stage 6 */
+
+	movq_r2r(mm3, mm5);						// copy z13
+	psubw_r2r(mm4, mm3);						// y3=z13 - z2
+
+	paddw_r2r(mm4, mm5);						// y5=z13 + z2
+	movq_r2r(mm0, mm6);						// copy z11
+
+   movq_r2m(mm3, *(dataptr+6)); 			//save y3
+	psubw_r2r(mm2, mm0);						// y7=z11 - z4
+
+   movq_r2m(mm5, *(dataptr+10)); 		//save y5
+	paddw_r2r(mm2, mm6);						// y1=z11 + z4
+
+   movq_r2m(mm0, *(dataptr+14)); 		//save y7
+
+	/************************************************
+	 *  End of 1st 4 rows
+	 ************************************************/
+
+   movq_m2r(*(dataptr+3), mm1); 			// load x1   /* stage 1 */
+	movq_r2r(mm7, mm0);						// copy x0
+
+   movq_r2m(mm6, *(dataptr+2)); 			//save y1
+
+   movq_m2r(*(dataptr+5), mm2); 			// load x2   /* stage 1 */
+	movq_r2r(mm1, mm6);						// copy x1
+
+   paddw_m2r(*(dataptr+15), mm0);  		// tmp00 = x0 + x7
+
+   movq_m2r(*(dataptr+7), mm3); 			// load x3   /* stage 1 */
+	movq_r2r(mm2, mm5);						// copy x2
+
+   psubw_m2r(*(dataptr+15), mm7);  		// tmp07 = x0 - x7
+	movq_r2r(mm3, mm4);						// copy x3
+
+   paddw_m2r(*(dataptr+13), mm1);  		// tmp01 = x1 + x6
+
+	movq_r2m(mm7, tmp7);						// save tmp07
+	movq_r2r(mm0, mm7);						// copy tmp00
+
+   psubw_m2r(*(dataptr+13), mm6);  		// tmp06 = x1 - x6
+
+   /* stage 2, Even Part */
+
+   paddw_m2r(*(dataptr+9), mm3);  		// tmp03 = x3 + x4
+
+	movq_r2m(mm6, tmp6);						// save tmp07
+	movq_r2r(mm1, mm6);						// copy tmp01
+
+   paddw_m2r(*(dataptr+11), mm2);  		// tmp02 = x2 + x5
+	paddw_r2r(mm3, mm0);              	// tmp10 = tmp00 + tmp03
+
+	psubw_r2r(mm3, mm7);              	// tmp13 = tmp00 - tmp03
+
+   psubw_m2r(*(dataptr+9), mm4);  		// tmp04 = x3 - x4
+	psubw_r2r(mm2, mm6);              	// tmp12 = tmp01 - tmp02
+
+	paddw_r2r(mm2, mm1);              	// tmp11 = tmp01 + tmp02
+
+   psubw_m2r(*(dataptr+11), mm5);  		// tmp05 = x2 - x5
+	paddw_r2r(mm7, mm6);              	//  tmp12 + tmp13
+
+   /* stage 3, Even and stage 4 & 5 even */
+
+	movq_m2r(tmp6, mm2);            		// load tmp6
+	movq_r2r(mm0, mm3);						// copy tmp10
+
+	psllw_i2r(2, mm6);			// shift z1
+	paddw_r2r(mm1, mm0);    				// y0=tmp10 + tmp11
+
+	pmulhw_m2r(RTjpeg_C4, mm6);    		// z1
+	psubw_r2r(mm1, mm3);    				// y4=tmp10 - tmp11
+
+   movq_r2m(mm0, *(dataptr+1)); 			//save y0
+	movq_r2r(mm7, mm0);						// copy tmp13
+  
+	/* odd part */
+
+   movq_r2m(mm3, *(dataptr+9)); 			//save y4
+	paddw_r2r(mm5, mm4);              	// tmp10 = tmp4 + tmp5
+
+	movq_m2r(tmp7, mm3);            		// load tmp7
+	paddw_r2r(mm6, mm0);              	// tmp32 = tmp13 + z1
+
+	paddw_r2r(mm2, mm5);              	// tmp11 = tmp5 + tmp6
+	psubw_r2r(mm6, mm7);              	// tmp33 = tmp13 - z1
+
+   movq_r2m(mm0, *(dataptr+5)); 			//save y2
+	paddw_r2r(mm3, mm2);              	// tmp12 = tmp6 + tmp7
+
+	/* stage 4 */
+
+   movq_r2m(mm7, *(dataptr+13)); 		//save y6
+	movq_r2r(mm4, mm1);						// copy tmp10
+
+	psubw_r2r(mm2, mm1);    				// tmp10 - tmp12
+	psllw_i2r(2, mm4);			// shift tmp10
+
+	movq_m2r(RTjpeg_C2mC6, mm0);			// load C2mC6
+	psllw_i2r(2, mm1);			// shift (tmp10-tmp12)
+
+	pmulhw_m2r(RTjpeg_C6, mm1);    		// z5
+	psllw_i2r(2, mm5);			// prepare for multiply 
+
+	pmulhw_r2r(mm0, mm4);					// multiply by converted real
+
+	/* stage 5 */
+
+	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
+	psllw_i2r(2, mm2);			// prepare for multiply 
+
+	pmulhw_m2r(RTjpeg_C2pC6, mm2);		// multiply
+	movq_r2r(mm3, mm0);						// copy tmp7
+
+	movq_m2r(*(dataptr+9), mm7);		 	// m03:m02|m01:m00 - first line (line 4)and copy into mm7
+	paddw_r2r(mm1, mm4);						// z2
+
+	paddw_r2r(mm5, mm0);						// z11
+	psubw_r2r(mm5, mm3);						// z13
+
+	/* stage 6 */
+
+	movq_r2r(mm3, mm5);						// copy z13
+	paddw_r2r(mm1, mm2);						// z4
+
+	movq_r2r(mm0, mm6);						// copy z11
+	psubw_r2r(mm4, mm5);						// y3
+
+	paddw_r2r(mm2, mm6);						// y1
+	paddw_r2r(mm4, mm3);						// y5
+
+   movq_r2m(mm5, *(dataptr+7)); 			//save y3
+
+   movq_r2m(mm6, *(dataptr+3)); 			//save y1
+	psubw_r2r(mm2, mm0);						// y7
+	
+/************************************************************************************************
+					Start of Transpose
+************************************************************************************************/
+
+ 	movq_m2r(*(dataptr+13), mm6);		 	// m23:m22|m21:m20 - third line (line 6)and copy into m2
+	movq_r2r(mm7, mm5);						// copy first line
+
+	punpcklwd_r2r(mm3, mm7); 				// m11:m01|m10:m00 - interleave first and second lines
+	movq_r2r(mm6, mm2);						// copy third line
+
+	punpcklwd_r2r(mm0, mm6);  				// m31:m21|m30:m20 - interleave third and fourth lines
+	movq_r2r(mm7, mm1);						// copy first intermediate result
+
+	punpckldq_r2r(mm6, mm7);				// m30:m20|m10:m00 - interleave to produce result 1
+
+	punpckhdq_r2r(mm6, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
+
+	movq_r2m(mm7, *(dataptr+9));			// write result 1
+	punpckhwd_r2r(mm3, mm5);				// m13:m03|m12:m02 - interleave first and second lines
+
+	movq_r2m(mm1, *(dataptr+11));			// write result 2
+	punpckhwd_r2r(mm0, mm2);				// m33:m23|m32:m22 - interleave third and fourth lines
+
+	movq_r2r(mm5, mm1);						// copy first intermediate result
+	punpckldq_r2r(mm2, mm5);				// m32:m22|m12:m02 - interleave to produce result 3
+
+	movq_m2r(*(dataptr+1), mm0);			// m03:m02|m01:m00 - first line, 4x4
+	punpckhdq_r2r(mm2, mm1);				// m33:m23|m13:m03 - interleave to produce result 4
+
+	movq_r2m(mm5, *(dataptr+13));			// write result 3
+
+	/****** last 4x4 done */
+
+	movq_r2m(mm1, *(dataptr+15));			// write result 4, last 4x4
+
+	movq_m2r(*(dataptr+5), mm2);			// m23:m22|m21:m20 - third line
+	movq_r2r(mm0, mm6);						// copy first line
+
+	punpcklwd_m2r(*(dataptr+3), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
+	movq_r2r(mm2, mm7);						// copy third line
+
+	punpcklwd_m2r(*(dataptr+7), mm2);  	// m31:m21|m30:m20 - interleave third and fourth lines
+	movq_r2r(mm0, mm4);						// copy first intermediate result
+
+	
+
+	movq_m2r(*(dataptr+8), mm1);			// n03:n02|n01:n00 - first line 
+	punpckldq_r2r(mm2, mm0);				// m30:m20|m10:m00 - interleave to produce first result
+
+	movq_m2r(*(dataptr+12), mm3);			// n23:n22|n21:n20 - third line
+	punpckhdq_r2r(mm2, mm4);				// m31:m21|m11:m01 - interleave to produce second result
+
+	punpckhwd_m2r(*(dataptr+3), mm6);  	// m13:m03|m12:m02 - interleave first and second lines
+	movq_r2r(mm1, mm2);						// copy first line
+
+	punpckhwd_m2r(*(dataptr+7), mm7);  	// m33:m23|m32:m22 - interleave third and fourth lines
+	movq_r2r(mm6, mm5);						// copy first intermediate result
+
+	movq_r2m(mm0, *(dataptr+8));			// write result 1
+	punpckhdq_r2r(mm7, mm5);				// m33:m23|m13:m03 - produce third result
+
+	punpcklwd_m2r(*(dataptr+10), mm1);  // n11:n01|n10:n00 - interleave first and second lines
+	movq_r2r(mm3, mm0);						// copy third line
+
+	punpckhwd_m2r(*(dataptr+10), mm2);  // n13:n03|n12:n02 - interleave first and second lines
+
+	movq_r2m(mm4, *(dataptr+10));			// write result 2 out
+	punpckldq_r2r(mm7, mm6);				// m32:m22|m12:m02 - produce fourth result
+
+	punpcklwd_m2r(*(dataptr+14), mm3);  // n33:n23|n32:n22 - interleave third and fourth lines
+	movq_r2r(mm1, mm4);						// copy second intermediate result
+
+	movq_r2m(mm6, *(dataptr+12));			// write result 3 out
+	punpckldq_r2r(mm3, mm1);				// 
+
+	punpckhwd_m2r(*(dataptr+14), mm0);  // n33:n23|n32:n22 - interleave third and fourth lines
+	movq_r2r(mm2, mm6);						// copy second intermediate result
+
+	movq_r2m(mm5, *(dataptr+14));			// write result 4 out
+	punpckhdq_r2r(mm3, mm4);				// n31:n21|n11:n01- produce second result
+
+	movq_r2m(mm1, *(dataptr+1));			// write result 5 out - (first result for other 4 x 4 block)
+	punpckldq_r2r(mm0, mm2);				// n32:n22|n12:n02- produce third result
+
+	movq_r2m(mm4, *(dataptr+3));			// write result 6 out
+	punpckhdq_r2r(mm0, mm6);				// n33:n23|n13:n03 - produce fourth result
+
+	movq_r2m(mm2, *(dataptr+5));			// write result 7 out
+
+	movq_m2r(*dataptr, mm0);				// m03:m02|m01:m00 - first line, first 4x4
+
+	movq_r2m(mm6, *(dataptr+7));			// write result 8 out
+
+// Do first 4x4 quadrant, which is used in the beginning of the DCT:
+
+	movq_m2r(*(dataptr+4), mm7);			// m23:m22|m21:m20 - third line
+	movq_r2r(mm0, mm2);						// copy first line
+
+	punpcklwd_m2r(*(dataptr+2), mm0);  	// m11:m01|m10:m00 - interleave first and second lines
+	movq_r2r(mm7, mm4);						// copy third line
+	
+	punpcklwd_m2r(*(dataptr+6), mm7);  	// m31:m21|m30:m20 - interleave third and fourth lines
+	movq_r2r(mm0, mm1);						// copy first intermediate result
+
+	movq_m2r(*(dataptr+2), mm6);			// m13:m12|m11:m10 - second line
+	punpckldq_r2r(mm7, mm0);				// m30:m20|m10:m00 - interleave to produce result 1
+
+	movq_m2r(*(dataptr+6), mm5);			// m33:m32|m31:m30 - fourth line
+	punpckhdq_r2r(mm7, mm1);				// m31:m21|m11:m01 - interleave to produce result 2
+
+	movq_r2r(mm0, mm7);						// write result 1
+	punpckhwd_r2r(mm6, mm2);				// m13:m03|m12:m02 - interleave first and second lines
+
+	psubw_m2r(*(dataptr+14), mm7);		// tmp07=x0-x7	/* Stage 1 */
+	movq_r2r(mm1, mm6);						// write result 2
+
+	paddw_m2r(*(dataptr+14), mm0);		// tmp00=x0+x7	/* Stage 1 */
+	punpckhwd_r2r(mm5, mm4);   			// m33:m23|m32:m22 - interleave third and fourth lines
+
+	paddw_m2r(*(dataptr+12), mm1);		// tmp01=x1+x6	/* Stage 1 */
+	movq_r2r(mm2, mm3);						// copy first intermediate result
+
+	psubw_m2r(*(dataptr+12), mm6);		// tmp06=x1-x6	/* Stage 1 */
+	punpckldq_r2r(mm4, mm2);				// m32:m22|m12:m02 - interleave to produce result 3
+
+	movq_r2m(mm7, tmp7);						// save tmp07
+	movq_r2r(mm2, mm5);						// write result 3
+
+	movq_r2m(mm6, tmp6);						// save tmp06
+
+	punpckhdq_r2r(mm4, mm3);				// m33:m23|m13:m03 - interleave to produce result 4
+
+	paddw_m2r(*(dataptr+10), mm2);   	// tmp02=x2+x5 /* stage 1 */
+	movq_r2r(mm3, mm4);						// write result 4
+
+/************************************************************************************************
+					End of Transpose 2
+************************************************************************************************/
+
+   paddw_m2r(*(dataptr+8), mm3);    	// tmp03=x3+x4 /* stage 1*/
+   movq_r2r(mm0, mm7);
+
+   psubw_m2r(*(dataptr+8), mm4);    	// tmp04=x3-x4 /* stage 1*/
+   movq_r2r(mm1, mm6);
+
+	paddw_r2r(mm3, mm0);  					// tmp10 = tmp00 + tmp03 /* even 2 */
+	psubw_r2r(mm3, mm7);  					// tmp13 = tmp00 - tmp03 /* even 2 */
+
+	psubw_r2r(mm2, mm6);  					// tmp12 = tmp01 - tmp02 /* even 2 */
+	paddw_r2r(mm2, mm1);  					// tmp11 = tmp01 + tmp02 /* even 2 */
+
+   psubw_m2r(*(dataptr+10), mm5);    	// tmp05=x2-x5 /* stage 1*/
+	paddw_r2r(mm7, mm6);						// tmp12 + tmp13
+
+	/* stage 3 */
+
+   movq_m2r(tmp6, mm2);
+   movq_r2r(mm0, mm3);
+
+	psllw_i2r(2, mm6);			// m8 * 2^2
+	paddw_r2r(mm1, mm0);		
+
+	pmulhw_m2r(RTjpeg_C4, mm6);			// z1
+	psubw_r2r(mm1, mm3);		
+
+   movq_r2m(mm0, *dataptr);
+   movq_r2r(mm7, mm0);
+   
+    /* Odd part */
+   movq_r2m(mm3, *(dataptr+8));
+	paddw_r2r(mm5, mm4);						// tmp10
+
+   movq_m2r(tmp7, mm3);
+	paddw_r2r(mm6, mm0);						// tmp32
+
+	paddw_r2r(mm2, mm5);						// tmp11
+	psubw_r2r(mm6, mm7);						// tmp33
+
+   movq_r2m(mm0, *(dataptr+4));
+	paddw_r2r(mm3, mm2);						// tmp12
+
+	/* stage 4 */
+   movq_r2m(mm7, *(dataptr+12));
+	movq_r2r(mm4, mm1);						// copy of tmp10
+
+	psubw_r2r(mm2, mm1);						// tmp10 - tmp12
+	psllw_i2r(2, mm4);			// m8 * 2^2
+
+	movq_m2r(RTjpeg_C2mC6, mm0);
+	psllw_i2r(2, mm1);
+
+	pmulhw_m2r(RTjpeg_C6, mm1);			// z5
+	psllw_i2r(2, mm2);
+
+	pmulhw_r2r(mm0, mm4);					// z5
+
+	/* stage 5 */
+
+	pmulhw_m2r(RTjpeg_C2pC6, mm2);
+	psllw_i2r(2, mm5);
+
+	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
+	movq_r2r(mm3, mm0);						// copy tmp7
+
+   movq_m2r(*(dataptr+1), mm7);
+	paddw_r2r(mm1, mm4);						// z2
+
+	paddw_r2r(mm1, mm2);						// z4
+
+	paddw_r2r(mm5, mm0);						// z11
+	psubw_r2r(mm5, mm3);						// z13
+
+	/* stage 6 */
+
+	movq_r2r(mm3, mm5);						// copy z13
+	psubw_r2r(mm4, mm3);						// y3=z13 - z2
+
+	paddw_r2r(mm4, mm5);						// y5=z13 + z2
+	movq_r2r(mm0, mm6);						// copy z11
+
+   movq_r2m(mm3, *(dataptr+6)); 			//save y3
+	psubw_r2r(mm2, mm0);						// y7=z11 - z4
+
+   movq_r2m(mm5, *(dataptr+10)); 		//save y5
+	paddw_r2r(mm2, mm6);						// y1=z11 + z4
+
+   movq_r2m(mm0, *(dataptr+14)); 		//save y7
+
+	/************************************************
+	 *  End of 1st 4 rows
+	 ************************************************/
+
+   movq_m2r(*(dataptr+3), mm1); 			// load x1   /* stage 1 */
+	movq_r2r(mm7, mm0);						// copy x0
+
+   movq_r2m(mm6, *(dataptr+2)); 			//save y1
+
+   movq_m2r(*(dataptr+5), mm2); 			// load x2   /* stage 1 */
+	movq_r2r(mm1, mm6);						// copy x1
+
+   paddw_m2r(*(dataptr+15), mm0);  		// tmp00 = x0 + x7
+
+   movq_m2r(*(dataptr+7), mm3); 			// load x3   /* stage 1 */
+	movq_r2r(mm2, mm5);						// copy x2
+
+   psubw_m2r(*(dataptr+15), mm7);  		// tmp07 = x0 - x7
+	movq_r2r(mm3, mm4);						// copy x3
+
+   paddw_m2r(*(dataptr+13), mm1);  		// tmp01 = x1 + x6
+
+	movq_r2m(mm7, tmp7);						// save tmp07
+	movq_r2r(mm0, mm7);						// copy tmp00
+
+   psubw_m2r(*(dataptr+13), mm6);  		// tmp06 = x1 - x6
+
+   /* stage 2, Even Part */
+
+   paddw_m2r(*(dataptr+9), mm3);  		// tmp03 = x3 + x4
+
+	movq_r2m(mm6, tmp6);						// save tmp07
+	movq_r2r(mm1, mm6);						// copy tmp01
+
+   paddw_m2r(*(dataptr+11), mm2);  		// tmp02 = x2 + x5
+	paddw_r2r(mm3, mm0);              	// tmp10 = tmp00 + tmp03
+
+	psubw_r2r(mm3, mm7);              	// tmp13 = tmp00 - tmp03
+
+   psubw_m2r(*(dataptr+9), mm4);  		// tmp04 = x3 - x4
+	psubw_r2r(mm2, mm6);              	// tmp12 = tmp01 - tmp02
+
+	paddw_r2r(mm2, mm1);              	// tmp11 = tmp01 + tmp02
+
+   psubw_m2r(*(dataptr+11), mm5);  		// tmp05 = x2 - x5
+	paddw_r2r(mm7, mm6);              	//  tmp12 + tmp13
+
+   /* stage 3, Even and stage 4 & 5 even */
+
+	movq_m2r(tmp6, mm2);            		// load tmp6
+	movq_r2r(mm0, mm3);						// copy tmp10
+
+	psllw_i2r(2, mm6);			// shift z1
+	paddw_r2r(mm1, mm0);    				// y0=tmp10 + tmp11
+
+	pmulhw_m2r(RTjpeg_C4, mm6);    		// z1
+	psubw_r2r(mm1, mm3);    				// y4=tmp10 - tmp11
+
+   movq_r2m(mm0, *(dataptr+1)); 			//save y0
+	movq_r2r(mm7, mm0);						// copy tmp13
+  
+	/* odd part */
+
+   movq_r2m(mm3, *(dataptr+9)); 			//save y4
+	paddw_r2r(mm5, mm4);              	// tmp10 = tmp4 + tmp5
+
+	movq_m2r(tmp7, mm3);            		// load tmp7
+	paddw_r2r(mm6, mm0);              	// tmp32 = tmp13 + z1
+
+	paddw_r2r(mm2, mm5);              	// tmp11 = tmp5 + tmp6
+	psubw_r2r(mm6, mm7);              	// tmp33 = tmp13 - z1
+
+   movq_r2m(mm0, *(dataptr+5)); 			//save y2
+	paddw_r2r(mm3, mm2);              	// tmp12 = tmp6 + tmp7
+
+	/* stage 4 */
+
+   movq_r2m(mm7, *(dataptr+13)); 		//save y6
+	movq_r2r(mm4, mm1);						// copy tmp10
+
+	psubw_r2r(mm2, mm1);    				// tmp10 - tmp12
+	psllw_i2r(2, mm4);			// shift tmp10
+
+	movq_m2r(RTjpeg_C2mC6, mm0);			// load C2mC6
+	psllw_i2r(2, mm1);			// shift (tmp10-tmp12)
+
+	pmulhw_m2r(RTjpeg_C6, mm1);    		// z5
+	psllw_i2r(2, mm5);			// prepare for multiply 
+
+	pmulhw_r2r(mm0, mm4);					// multiply by converted real
+
+	/* stage 5 */
+
+	pmulhw_m2r(RTjpeg_C4, mm5);			// z3
+	psllw_i2r(2, mm2);			// prepare for multiply 
+
+	pmulhw_m2r(RTjpeg_C2pC6, mm2);		// multiply
+	movq_r2r(mm3, mm0);						// copy tmp7
+
+	movq_m2r(*(dataptr+9), mm7);		 	// m03:m02|m01:m00 - first line (line 4)and copy into mm7
+	paddw_r2r(mm1, mm4);						// z2
+
+	paddw_r2r(mm5, mm0);						// z11
+	psubw_r2r(mm5, mm3);						// z13
+
+	/* stage 6 */
+
+	movq_r2r(mm3, mm5);						// copy z13
+	paddw_r2r(mm1, mm2);						// z4
+
+	movq_r2r(mm0, mm6);						// copy z11
+	psubw_r2r(mm4, mm5);						// y3
+
+	paddw_r2r(mm2, mm6);						// y1
+	paddw_r2r(mm4, mm3);						// y5
+
+   movq_r2m(mm5, *(dataptr+7)); 			//save y3
+	psubw_r2r(mm2, mm0);						// yè=z11 - z4
+
+   movq_r2m(mm3, *(dataptr+11)); 		//save y5
+
+   movq_r2m(mm6, *(dataptr+3)); 			//save y1
+
+   movq_r2m(mm0, *(dataptr+15)); 		//save y7
+	
+
+#endif
+}
+
+#define FIX_1_082392200  ((__s32)  277)		/* FIX(1.082392200) */
+#define FIX_1_414213562  ((__s32)  362)		/* FIX(1.414213562) */
+#define FIX_1_847759065  ((__s32)  473)		/* FIX(1.847759065) */
+#define FIX_2_613125930  ((__s32)  669)		/* FIX(2.613125930) */
+
+#define DESCALE(x) (__s16)( ((x)+4) >> 3)
+
+/* clip yuv to 16..235 (should be 16..240 for cr/cb but ... */
+
+#define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x))
+#define MULTIPLY(var,const)  (((__s32) ((var) * (const)) + 128)>>8)
+
+void RTjpeg_idct_init(void)
+{
+ int i;
+ 
+ for(i=0; i<64; i++)
+ {
+  RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32;
+  RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32;
+ }
+}
+
+void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip)
+{
+#ifdef MMX
+
+static mmx_t fix_141			= (mmx_t)(long long)0x5a825a825a825a82LL;
+static mmx_t fix_184n261	= (mmx_t)(long long)0xcf04cf04cf04cf04LL;
+static mmx_t fix_184			= (mmx_t)(long long)0x7641764176417641LL;
+static mmx_t fix_n184		= (mmx_t)(long long)0x896f896f896f896fLL;
+static mmx_t fix_108n184	= (mmx_t)(long long)0xcf04cf04cf04cf04LL;
+
+  mmx_t workspace[64];
+  mmx_t *wsptr = workspace;
+  register mmx_t *dataptr = (mmx_t *)odata;
+  mmx_t *idata = (mmx_t *)data;
+
+  rskip = rskip>>3;
+/*
+ * Perform inverse DCT on one block of coefficients.
+ */
+
+    /* Odd part */
+
+	movq_m2r(*(idata+10), mm1);	// load idata[DCTSIZE*5]
+
+	movq_m2r(*(idata+6), mm0);		// load idata[DCTSIZE*3]
+
+	movq_m2r(*(idata+2), mm3);		// load idata[DCTSIZE*1]
+
+	movq_r2r(mm1, mm2);				// copy tmp6	/* phase 6 */
+
+	movq_m2r(*(idata+14), mm4);	// load idata[DCTSIZE*7]
+
+	paddw_r2r(mm0, mm1);				// z13 = tmp6 + tmp5;
+
+	psubw_r2r(mm0, mm2);				// z10 = tmp6 - tmp5   
+
+	psllw_i2r(2, mm2);				// shift z10
+	movq_r2r(mm2, mm0); 				// copy z10
+
+	pmulhw_m2r(fix_184n261, mm2);	// MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
+	movq_r2r(mm3, mm5);				// copy tmp4
+
+	pmulhw_m2r(fix_n184, mm0);		// MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
+	paddw_r2r(mm4, mm3);				// z11 = tmp4 + tmp7;
+
+	movq_r2r(mm3, mm6);				// copy z11			/* phase 5 */
+	psubw_r2r(mm4, mm5);				// z12 = tmp4 - tmp7;
+
+	psubw_r2r(mm1, mm6);				// z11-z13
+	psllw_i2r(2, mm5);				//	shift z12
+
+	movq_m2r(*(idata+12), mm4);	// load idata[DCTSIZE*6], even part
+ 	movq_r2r(mm5, mm7);				//	copy z12
+
+	pmulhw_m2r(fix_108n184, mm5); //	MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
+	paddw_r2r(mm1, mm3);				// tmp7 = z11 + z13;	
+
+	//ok
+
+    /* Even part */
+	pmulhw_m2r(fix_184, mm7);		// MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
+	psllw_i2r(2, mm6);
+
+	movq_m2r(*(idata+4), mm1);		// load idata[DCTSIZE*2]
+
+	paddw_r2r(mm5, mm0);				//	tmp10
+
+	paddw_r2r(mm7, mm2);				// tmp12
+
+	pmulhw_m2r(fix_141, mm6);		// tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+	psubw_r2r(mm3, mm2);				// tmp6 = tmp12 - tmp7
+
+	movq_r2r(mm1, mm5);				// copy tmp1
+	paddw_r2r(mm4, mm1);				// tmp13= tmp1 + tmp3;	/* phases 5-3 */
+
+	psubw_r2r(mm4, mm5);				// tmp1-tmp3
+	psubw_r2r(mm2, mm6);				// tmp5 = tmp11 - tmp6;
+
+	movq_r2m(mm1, *(wsptr));		// save tmp13 in workspace
+	psllw_i2r(2, mm5);	// shift tmp1-tmp3
+    
+	movq_m2r(*(idata), mm7); 		// load idata[DCTSIZE*0]
+
+	pmulhw_m2r(fix_141, mm5);		// MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
+	paddw_r2r(mm6, mm0);				// tmp4 = tmp10 + tmp5;
+
+	movq_m2r(*(idata+8), mm4); 	// load idata[DCTSIZE*4]
+	
+	psubw_r2r(mm1, mm5);				// tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+	movq_r2m(mm0, *(wsptr+4));		// save tmp4 in workspace
+	movq_r2r(mm7, mm1);			 	// copy tmp0	/* phase 3 */
+
+	movq_r2m(mm5, *(wsptr+2));		// save tmp12 in workspace
+	psubw_r2r(mm4, mm1);				// tmp11 = tmp0 - tmp2; 
+
+	paddw_r2r(mm4, mm7);				// tmp10 = tmp0 + tmp2;
+   movq_r2r(mm1, mm5);				// copy tmp11
+	
+	paddw_m2r(*(wsptr+2), mm1);	// tmp1 = tmp11 + tmp12;
+	movq_r2r(mm7, mm4);				// copy tmp10		/* phase 2 */
+
+	paddw_m2r(*(wsptr), mm7);		// tmp0 = tmp10 + tmp13;	
+
+	psubw_m2r(*(wsptr), mm4);		// tmp3 = tmp10 - tmp13;
+	movq_r2r(mm7, mm0);				//	copy tmp0
+
+	psubw_m2r(*(wsptr+2), mm5);	// tmp2 = tmp11 - tmp12;
+	paddw_r2r(mm3, mm7);				//	wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+	
+	psubw_r2r(mm3, mm0);				// wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+
+	movq_r2m(mm7, *(wsptr));		//	wsptr[DCTSIZE*0]
+	movq_r2r(mm1, mm3);				//	copy tmp1
+
+	movq_r2m(mm0, *(wsptr+14));		// wsptr[DCTSIZE*7]
+	paddw_r2r(mm2, mm1);				// wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+
+	psubw_r2r(mm2, mm3);				// wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+
+	movq_r2m(mm1, *(wsptr+2));		// wsptr[DCTSIZE*1]
+	movq_r2r(mm4, mm1);				//	copy tmp3
+
+	movq_r2m(mm3, *(wsptr+12));		// wsptr[DCTSIZE*6]
+
+	paddw_m2r(*(wsptr+4), mm4);	// wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+
+	psubw_m2r(*(wsptr+4), mm1); 	// wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+
+	movq_r2m(mm4, *(wsptr+8));		
+	movq_r2r(mm5, mm7);				// copy tmp2
+
+	paddw_r2r(mm6, mm5);				// wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
+
+	movq_r2m(mm1, *(wsptr+6));	
+	psubw_r2r(mm6, mm7);				//	wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+
+	movq_r2m(mm5, *(wsptr+4));	
+
+	movq_r2m(mm7, *(wsptr+10));		
+
+	//ok
+
+
+/*****************************************************************/
+
+	idata++;
+	wsptr++;
+
+/*****************************************************************/
+
+	movq_m2r(*(idata+10), mm1);	// load idata[DCTSIZE*5]
+
+	movq_m2r(*(idata+6), mm0);		// load idata[DCTSIZE*3]
+
+	movq_m2r(*(idata+2),	mm3);		// load idata[DCTSIZE*1]
+	movq_r2r(mm1, mm2);				//	copy tmp6	/* phase 6 */
+
+	movq_m2r(*(idata+14),	mm4);		// load idata[DCTSIZE*7]
+	paddw_r2r(mm0, mm1);				//	z13 = tmp6 + tmp5;
+
+	psubw_r2r(mm0, mm2);				//	z10 = tmp6 - tmp5   
+
+	psllw_i2r(2, mm2);				//	shift z10
+	movq_r2r(mm2, mm0);				//	copy z10
+
+	pmulhw_m2r(fix_184n261, mm2);	// MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
+	movq_r2r(mm3, mm5);				//	copy tmp4
+
+	pmulhw_m2r(fix_n184, mm0);		// MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
+	paddw_r2r(mm4, mm3);				// z11 = tmp4 + tmp7;
+
+	movq_r2r(mm3, mm6);				// copy z11			/* phase 5 */
+	psubw_r2r(mm4, mm5);				//	z12 = tmp4 - tmp7;
+
+	psubw_r2r(mm1, mm6);				// z11-z13
+	psllw_i2r(2, mm5);				//	shift z12
+
+	movq_m2r(*(idata+12), mm4);	// load idata[DCTSIZE*6], even part
+ 	movq_r2r(mm5, mm7);				// copy z12
+
+	pmulhw_m2r(fix_108n184, mm5);	// MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
+	paddw_r2r(mm1, mm3);				// tmp7 = z11 + z13;	
+
+	//ok
+
+    /* Even part */
+	pmulhw_m2r(fix_184, mm7);		// MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
+	psllw_i2r(2, mm6);
+
+	movq_m2r(*(idata+4), mm1);		// load idata[DCTSIZE*2]
+
+	paddw_r2r(mm5, mm0);				//	tmp10
+
+	paddw_r2r(mm7, mm2);				// tmp12
+
+	pmulhw_m2r(fix_141, mm6);		// tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+	psubw_r2r(mm3, mm2);				// tmp6 = tmp12 - tmp7
+
+	movq_r2r(mm1, mm5);				// copy tmp1
+	paddw_r2r(mm4, mm1);				// tmp13= tmp1 + tmp3;	/* phases 5-3 */
+
+	psubw_r2r(mm4, mm5);				// tmp1-tmp3
+	psubw_r2r(mm2, mm6);				// tmp5 = tmp11 - tmp6;
+
+	movq_r2m(mm1, *(wsptr));		// save tmp13 in workspace
+	psllw_i2r(2, mm5); 				// shift tmp1-tmp3
+    
+	movq_m2r(*(idata), mm7);		// load idata[DCTSIZE*0]
+	paddw_r2r(mm6, mm0);				// tmp4 = tmp10 + tmp5;
+
+	pmulhw_m2r(fix_141, mm5);		// MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
+
+	movq_m2r(*(idata+8), mm4);    // load idata[DCTSIZE*4]
+	
+	psubw_r2r(mm1, mm5);				// tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+	movq_r2m(mm0, *(wsptr+4));		// save tmp4 in workspace
+	movq_r2r(mm7, mm1);				// copy tmp0	/* phase 3 */
+
+	movq_r2m(mm5, *(wsptr+2));		// save tmp12 in workspace
+	psubw_r2r(mm4, mm1);				// tmp11 = tmp0 - tmp2; 
+
+	paddw_r2r(mm4, mm7);				// tmp10 = tmp0 + tmp2;
+   movq_r2r(mm1, mm5);				// copy tmp11
+	
+	paddw_m2r(*(wsptr+2), mm1);	// tmp1 = tmp11 + tmp12;
+	movq_r2r(mm7, mm4);				// copy tmp10		/* phase 2 */
+
+	paddw_m2r(*(wsptr), mm7);		// tmp0 = tmp10 + tmp13;	
+
+	psubw_m2r(*(wsptr), mm4);		// tmp3 = tmp10 - tmp13;
+	movq_r2r(mm7, mm0);				// copy tmp0
+
+	psubw_m2r(*(wsptr+2), mm5);	// tmp2 = tmp11 - tmp12;
+	paddw_r2r(mm3, mm7);				// wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+	
+	psubw_r2r(mm3, mm0);				// wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+
+	movq_r2m(mm7, *(wsptr));		// wsptr[DCTSIZE*0]
+	movq_r2r(mm1, mm3);				// copy tmp1
+
+	movq_r2m(mm0, *(wsptr+14));		// wsptr[DCTSIZE*7]
+	paddw_r2r(mm2, mm1);				// wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+
+	psubw_r2r(mm2, mm3);				// wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+
+	movq_r2m(mm1, *(wsptr+2));		// wsptr[DCTSIZE*1]
+	movq_r2r(mm4, mm1);				// copy tmp3
+
+	movq_r2m(mm3, *(wsptr+12));		// wsptr[DCTSIZE*6]
+
+	paddw_m2r(*(wsptr+4), mm4);	// wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+
+	psubw_m2r(*(wsptr+4), mm1);	// wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+
+	movq_r2m(mm4, *(wsptr+8));		
+	movq_r2r(mm5, mm7);				// copy tmp2
+
+	paddw_r2r(mm6, mm5);				// wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
+
+	movq_r2m(mm1, *(wsptr+6));		
+	psubw_r2r(mm6, mm7);				// wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+
+	movq_r2m(mm5, *(wsptr+4));	
+
+	movq_r2m(mm7, *(wsptr+10));
+
+/*****************************************************************/
+
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+/*****************************************************************/
+    /* Even part */
+
+	wsptr--;
+
+//    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
+//    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
+//    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
+//    tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
+	movq_m2r(*(wsptr), mm0);		// wsptr[0,0],[0,1],[0,2],[0,3]
+
+	movq_m2r(*(wsptr+1),	mm1);		// wsptr[0,4],[0,5],[0,6],[0,7]
+	movq_r2r(mm0, mm2);
+	
+	movq_m2r(*(wsptr+2), mm3);		// wsptr[1,0],[1,1],[1,2],[1,3]
+	paddw_r2r(mm1, mm0);				// wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
+
+	movq_m2r(*(wsptr+3), mm4);		// wsptr[1,4],[1,5],[1,6],[1,7]
+	psubw_r2r(mm1, mm2);				// wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
+
+	movq_r2r(mm0, mm6);
+	movq_r2r(mm3, mm5);
+	
+	paddw_r2r(mm4, mm3);				// wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
+	movq_r2r(mm2, mm1);
+
+	psubw_r2r(mm4, mm5);				// wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
+	punpcklwd_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
+
+	movq_m2r(*(wsptr+7), mm7);		// wsptr[3,4],[3,5],[3,6],[3,7]
+	punpckhwd_r2r(mm3, mm6);		// wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
+
+	movq_m2r(*(wsptr+4), mm3);		// wsptr[2,0],[2,1],[2,2],[2,3]
+	punpckldq_r2r(mm6, mm0);		// wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
+
+	punpcklwd_r2r(mm5, mm1);		// wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
+	movq_r2r(mm3, mm4);
+
+	movq_m2r(*(wsptr+6), mm6);		// wsptr[3,0],[3,1],[3,2],[3,3]
+	punpckhwd_r2r(mm5, mm2);		// wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
+
+	movq_m2r(*(wsptr+5), mm5);		// wsptr[2,4],[2,5],[2,6],[2,7]
+	punpckldq_r2r(mm2, mm1);		// wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+
+	
+	paddw_r2r(mm5, mm3);				// wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
+	movq_r2r(mm6, mm2);
+
+	psubw_r2r(mm5, mm4);				// wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
+	paddw_r2r(mm7, mm6);				// wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
+
+	movq_r2r(mm3, mm5);
+	punpcklwd_r2r(mm6, mm3);		// wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
+	
+	psubw_r2r(mm7, mm2);				// wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
+	punpckhwd_r2r(mm6, mm5);		// wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
+
+	movq_r2r(mm4, mm7);
+	punpckldq_r2r(mm5, mm3);		// wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
+						 
+	punpcklwd_r2r(mm2, mm4);		// wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
+
+	punpckhwd_r2r(mm2, mm7);		// wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
+
+	punpckldq_r2r(mm7, mm4);		// wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
+	movq_r2r(mm1, mm6);
+
+	//ok
+
+//	mm0 = 	;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
+//	mm1 =	;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+
+
+	movq_r2r(mm0, mm2);
+	punpckhdq_r2r(mm4, mm6);		// wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
+
+	punpckldq_r2r(mm4, mm1);		// wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
+	psllw_i2r(2, mm6);
+
+	pmulhw_m2r(fix_141, mm6);
+	punpckldq_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
+
+	punpckhdq_r2r(mm3, mm2);		// wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
+	movq_r2r(mm0, mm7);
+
+//    tmp0 = tmp10 + tmp13;
+//    tmp3 = tmp10 - tmp13;
+	paddw_r2r(mm2, mm0);				// [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
+	psubw_r2r(mm2, mm7);				// [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
+
+//    tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
+	psubw_r2r(mm2, mm6);				// wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
+//    tmp1 = tmp11 + tmp12;
+//    tmp2 = tmp11 - tmp12;
+	movq_r2r(mm1, mm5);
+
+	//OK
+
+    /* Odd part */
+
+//    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+//    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+//    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+//    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+	movq_m2r(*(wsptr), mm3);		// wsptr[0,0],[0,1],[0,2],[0,3]
+	paddw_r2r(mm6, mm1);				// [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
+
+	movq_m2r(*(wsptr+1), mm4);		// wsptr[0,4],[0,5],[0,6],[0,7]
+	psubw_r2r(mm6, mm5);				// [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
+
+	movq_r2r(mm3, mm6);
+	punpckldq_r2r(mm4, mm3);		// wsptr[0,0],[0,1],[0,4],[0,5]
+
+	punpckhdq_r2r(mm6, mm4);		// wsptr[0,6],[0,7],[0,2],[0,3]
+	movq_r2r(mm3, mm2);
+
+//Save tmp0 and tmp1 in wsptr
+	movq_r2m(mm0, *(wsptr));		// save tmp0
+	paddw_r2r(mm4, mm2);				// wsptr[xxx],[0,z11],[xxx],[0,z13]
+
+	
+//Continue with z10 --- z13
+	movq_m2r(*(wsptr+2), mm6);		// wsptr[1,0],[1,1],[1,2],[1,3]
+	psubw_r2r(mm4, mm3);				// wsptr[xxx],[0,z12],[xxx],[0,z10]
+
+	movq_m2r(*(wsptr+3), mm0);		// wsptr[1,4],[1,5],[1,6],[1,7]
+	movq_r2r(mm6, mm4);
+
+	movq_r2m(mm1, *(wsptr+1));		// save tmp1
+	punpckldq_r2r(mm0, mm6);		// wsptr[1,0],[1,1],[1,4],[1,5]
+
+	punpckhdq_r2r(mm4, mm0);		// wsptr[1,6],[1,7],[1,2],[1,3]
+	movq_r2r(mm6, mm1);
+	
+//Save tmp2 and tmp3 in wsptr
+	paddw_r2r(mm0, mm6);				// wsptr[xxx],[1,z11],[xxx],[1,z13]
+	movq_r2r(mm2, mm4);
+	
+//Continue with z10 --- z13
+	movq_r2m(mm5, *(wsptr+2));		// save tmp2
+	punpcklwd_r2r(mm6, mm2);		// wsptr[xxx],[xxx],[0,z11],[1,z11]
+
+	psubw_r2r(mm0, mm1);				// wsptr[xxx],[1,z12],[xxx],[1,z10]
+	punpckhwd_r2r(mm6, mm4);		// wsptr[xxx],[xxx],[0,z13],[1,z13]
+
+	movq_r2r(mm3, mm0);
+	punpcklwd_r2r(mm1, mm3);		// wsptr[xxx],[xxx],[0,z12],[1,z12]
+
+	movq_r2m(mm7, *(wsptr+3));		// save tmp3
+	punpckhwd_r2r(mm1, mm0);		// wsptr[xxx],[xxx],[0,z10],[1,z10]
+
+	movq_m2r(*(wsptr+4), mm6);		// wsptr[2,0],[2,1],[2,2],[2,3]
+	punpckhdq_r2r(mm2, mm0);		// wsptr[0,z10],[1,z10],[0,z11],[1,z11]
+
+	movq_m2r(*(wsptr+5), mm7);	// wsptr[2,4],[2,5],[2,6],[2,7]
+	punpckhdq_r2r(mm4, mm3);		// wsptr[0,z12],[1,z12],[0,z13],[1,z13]
+
+	movq_m2r(*(wsptr+6), mm1);	// wsptr[3,0],[3,1],[3,2],[3,3]
+	movq_r2r(mm6, mm4);
+
+	punpckldq_r2r(mm7, mm6);		// wsptr[2,0],[2,1],[2,4],[2,5]
+	movq_r2r(mm1, mm5);
+
+	punpckhdq_r2r(mm4, mm7);		// wsptr[2,6],[2,7],[2,2],[2,3]
+	movq_r2r(mm6, mm2);
+	
+	movq_m2r(*(wsptr+7), mm4);	// wsptr[3,4],[3,5],[3,6],[3,7]
+	paddw_r2r(mm7, mm6);				// wsptr[xxx],[2,z11],[xxx],[2,z13]
+
+	psubw_r2r(mm7, mm2);				// wsptr[xxx],[2,z12],[xxx],[2,z10]
+	punpckldq_r2r(mm4, mm1);		// wsptr[3,0],[3,1],[3,4],[3,5]
+
+	punpckhdq_r2r(mm5, mm4);		// wsptr[3,6],[3,7],[3,2],[3,3]
+	movq_r2r(mm1, mm7);
+
+	paddw_r2r(mm4, mm1);				// wsptr[xxx],[3,z11],[xxx],[3,z13]
+	psubw_r2r(mm4, mm7);				// wsptr[xxx],[3,z12],[xxx],[3,z10]
+
+	movq_r2r(mm6, mm5);
+	punpcklwd_r2r(mm1, mm6);		// wsptr[xxx],[xxx],[2,z11],[3,z11]
+
+	punpckhwd_r2r(mm1, mm5);		// wsptr[xxx],[xxx],[2,z13],[3,z13]
+	movq_r2r(mm2, mm4);
+
+	punpcklwd_r2r(mm7, mm2);		// wsptr[xxx],[xxx],[2,z12],[3,z12]
+
+	punpckhwd_r2r(mm7, mm4);		// wsptr[xxx],[xxx],[2,z10],[3,z10]
+
+	punpckhdq_r2r(mm6, mm4);		/// wsptr[2,z10],[3,z10],[2,z11],[3,z11]
+
+	punpckhdq_r2r(mm5, mm2);		// wsptr[2,z12],[3,z12],[2,z13],[3,z13]
+	movq_r2r(mm0, mm5);
+
+	punpckldq_r2r(mm4, mm0);		// wsptr[0,z10],[1,z10],[2,z10],[3,z10]
+
+	punpckhdq_r2r(mm4, mm5);		// wsptr[0,z11],[1,z11],[2,z11],[3,z11]
+	movq_r2r(mm3, mm4);
+
+	punpckhdq_r2r(mm2, mm4);		// wsptr[0,z13],[1,z13],[2,z13],[3,z13]
+	movq_r2r(mm5, mm1);
+
+	punpckldq_r2r(mm2, mm3);		// wsptr[0,z12],[1,z12],[2,z12],[3,z12]
+//    tmp7 = z11 + z13;		/* phase 5 */
+//    tmp8 = z11 - z13;		/* phase 5 */
+	psubw_r2r(mm4, mm1);				// tmp8
+
+	paddw_r2r(mm4, mm5);				// tmp7
+//    tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
+	psllw_i2r(2, mm1);
+
+	psllw_i2r(2, mm0);
+
+	pmulhw_m2r(fix_141, mm1);		// tmp21
+//    tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065))  /* 2*(c2-c6) */
+//			+ MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
+	psllw_i2r(2, mm3);
+	movq_r2r(mm0, mm7);
+
+	pmulhw_m2r(fix_n184, mm7);
+	movq_r2r(mm3, mm6);
+
+	movq_m2r(*(wsptr), mm2);		// tmp0,final1
+
+	pmulhw_m2r(fix_108n184, mm6);
+//	 tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
+//			+ MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
+	movq_r2r(mm2, mm4);				// final1
+  
+	pmulhw_m2r(fix_184n261, mm0);
+	paddw_r2r(mm5, mm2);				// tmp0+tmp7,final1
+
+	pmulhw_m2r(fix_184, mm3);
+	psubw_r2r(mm5, mm4);				// tmp0-tmp7,final1
+
+//    tmp6 = tmp22 - tmp7;	/* phase 2 */
+	psraw_i2r(3, mm2);				// outptr[0,0],[1,0],[2,0],[3,0],final1
+
+	paddw_r2r(mm6, mm7);				// tmp20
+	psraw_i2r(3, mm4);				// outptr[0,7],[1,7],[2,7],[3,7],final1
+
+	paddw_r2r(mm0, mm3);				// tmp22
+
+//    tmp5 = tmp21 - tmp6;
+	psubw_r2r(mm5, mm3);				// tmp6
+
+//    tmp4 = tmp20 + tmp5;
+	movq_m2r(*(wsptr+1), mm0);		// tmp1,final2
+	psubw_r2r(mm3, mm1);				// tmp5
+
+	movq_r2r(mm0, mm6);				// final2
+	paddw_r2r(mm3, mm0);				// tmp1+tmp6,final2
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+
+//    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
+//			    & RANGE_MASK];	final1
+
+
+//    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
+//			    & RANGE_MASK];	final2
+	psubw_r2r(mm3, mm6);				// tmp1-tmp6,final2
+	psraw_i2r(3, mm0);				// outptr[0,1],[1,1],[2,1],[3,1]
+
+	psraw_i2r(3, mm6);				// outptr[0,6],[1,6],[2,6],[3,6]
+	
+	packuswb_r2r(mm4, mm0);			// out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
+	
+	movq_m2r(*(wsptr+2), mm5);		// tmp2,final3
+	packuswb_r2r(mm6, mm2);			// out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
+
+//    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
+//			    & RANGE_MASK];	final3
+	paddw_r2r(mm1, mm7);				// tmp4
+	movq_r2r(mm5, mm3);
+
+	paddw_r2r(mm1, mm5);				// tmp2+tmp5
+	psubw_r2r(mm1, mm3);				// tmp2-tmp5
+
+	psraw_i2r(3, mm5);				// outptr[0,2],[1,2],[2,2],[3,2]
+
+	movq_m2r(*(wsptr+3), mm4);		// tmp3,final4
+	psraw_i2r(3, mm3);				// outptr[0,5],[1,5],[2,5],[3,5]
+
+
+
+//    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
+//			    & RANGE_MASK];	final4
+	movq_r2r(mm4, mm6);
+	paddw_r2r(mm7, mm4);				// tmp3+tmp4
+
+	psubw_r2r(mm7, mm6);				// tmp3-tmp4
+	psraw_i2r(3, mm4);				// outptr[0,4],[1,4],[2,4],[3,4]
+
+	// mov			ecx, [dataptr]
+
+	psraw_i2r(3, mm6);				// outptr[0,3],[1,3],[2,3],[3,3]
+
+	packuswb_r2r(mm4, mm5);			// out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
+
+	packuswb_r2r(mm3, mm6);			// out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
+	movq_r2r(mm2, mm4);
+
+	movq_r2r(mm5, mm7);
+	punpcklbw_r2r(mm0, mm2);		// out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
+
+	punpckhbw_r2r(mm0, mm4);		// out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
+	movq_r2r(mm2, mm1);
+
+	punpcklbw_r2r(mm6, mm5);		// out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
+
+	// add		 	dataptr, 4
+
+	punpckhbw_r2r(mm6, mm7);		// out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
+
+	punpcklwd_r2r(mm5, mm2);		// out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
+	
+	// add			ecx, output_col
+
+	movq_r2r(mm7, mm6);
+	punpckhwd_r2r(mm5, mm1);		// out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
+
+	movq_r2r(mm2, mm0);
+	punpcklwd_r2r(mm4, mm6);		// out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
+
+	// mov			idata, [dataptr]
+	
+	punpckldq_r2r(mm6, mm2);		// out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
+
+	// add		 	dataptr, 4
+	 
+	movq_r2r(mm1, mm3);
+
+	// add			idata, output_col 
+	
+	punpckhwd_r2r(mm4, mm7);		// out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
+	
+	movq_r2m(mm2, *(dataptr));
+	
+	punpckhdq_r2r(mm6, mm0);		// out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
+
+	dataptr += rskip;
+	movq_r2m(mm0, *(dataptr));
+
+	punpckldq_r2r(mm7, mm1);		// out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
+	punpckhdq_r2r(mm7, mm3);		// out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
+	
+	dataptr += rskip;
+	movq_r2m(mm1, *(dataptr));
+
+	dataptr += rskip;
+	movq_r2m(mm3, *(dataptr));
+
+/*******************************************************************/
+
+	wsptr += 8;
+
+/*******************************************************************/
+
+//    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
+//    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
+//    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
+//    tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
+	movq_m2r(*(wsptr), mm0);		// wsptr[0,0],[0,1],[0,2],[0,3]
+
+	movq_m2r(*(wsptr+1), mm1);		// wsptr[0,4],[0,5],[0,6],[0,7]
+	movq_r2r(mm0, mm2);
+	
+	movq_m2r(*(wsptr+2), mm3);		// wsptr[1,0],[1,1],[1,2],[1,3]
+	paddw_r2r(mm1, mm0);				// wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
+
+	movq_m2r(*(wsptr+3), mm4);		// wsptr[1,4],[1,5],[1,6],[1,7]
+	psubw_r2r(mm1, mm2);				// wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
+
+	movq_r2r(mm0, mm6);
+	movq_r2r(mm3, mm5);
+	
+	paddw_r2r(mm4, mm3);				// wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
+	movq_r2r(mm2, mm1);
+
+	psubw_r2r(mm4, mm5);				// wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
+	punpcklwd_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
+
+	movq_m2r(*(wsptr+7), mm7);	// wsptr[3,4],[3,5],[3,6],[3,7]
+	punpckhwd_r2r(mm3, mm6);		// wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
+
+	movq_m2r(*(wsptr+4),	mm3);		// wsptr[2,0],[2,1],[2,2],[2,3]
+	punpckldq_r2r(mm6, mm0);		// wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
+
+	punpcklwd_r2r(mm5, mm1);		// wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
+	movq_r2r(mm3, mm4);
+
+	movq_m2r(*(wsptr+6), mm6);	// wsptr[3,0],[3,1],[3,2],[3,3]
+	punpckhwd_r2r(mm5, mm2);		// wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
+
+	movq_m2r(*(wsptr+5), mm5);	// wsptr[2,4],[2,5],[2,6],[2,7]
+	punpckldq_r2r(mm2, mm1);		// wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+
+	paddw_r2r(mm5, mm3);				// wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
+	movq_r2r(mm6, mm2);
+
+	psubw_r2r(mm5, mm4);				// wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
+	paddw_r2r(mm7, mm6);				// wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
+
+	movq_r2r(mm3, mm5);
+	punpcklwd_r2r(mm6, mm3);		// wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
+	
+	psubw_r2r(mm7, mm2);				// wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
+	punpckhwd_r2r(mm6, mm5);		// wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
+
+	movq_r2r(mm4, mm7);
+	punpckldq_r2r(mm5, mm3);		// wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
+
+	punpcklwd_r2r(mm2, mm4);		// wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
+
+	punpckhwd_r2r(mm2, mm7);		// wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
+
+	punpckldq_r2r(mm7, mm4);		// wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
+	movq_r2r(mm1, mm6);
+
+	//OK
+
+//	mm0 = 	;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
+//	mm1 =	;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
+
+	movq_r2r(mm0, mm2);
+	punpckhdq_r2r(mm4, mm6);		// wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
+
+	punpckldq_r2r(mm4, mm1);		// wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
+	psllw_i2r(2, mm6);
+
+	pmulhw_m2r(fix_141, mm6);
+	punpckldq_r2r(mm3, mm0);		// wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
+
+	punpckhdq_r2r(mm3, mm2);		// wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
+	movq_r2r(mm0, mm7);
+
+//    tmp0 = tmp10 + tmp13;
+//    tmp3 = tmp10 - tmp13;
+	paddw_r2r(mm2, mm0);				// [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
+	psubw_r2r(mm2, mm7);				// [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
+
+//    tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
+	psubw_r2r(mm2, mm6);				// wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
+//    tmp1 = tmp11 + tmp12;
+//    tmp2 = tmp11 - tmp12;
+	movq_r2r(mm1, mm5);
+
+	 //OK
+
+
+    /* Odd part */
+
+//    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+//    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+//    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+//    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+	movq_m2r(*(wsptr), mm3);		// wsptr[0,0],[0,1],[0,2],[0,3]
+	paddw_r2r(mm6, mm1);				// [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
+
+	movq_m2r(*(wsptr+1),	mm4);		// wsptr[0,4],[0,5],[0,6],[0,7]
+	psubw_r2r(mm6, mm5);				// [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
+
+	movq_r2r(mm3, mm6);
+	punpckldq_r2r(mm4, mm3);		// wsptr[0,0],[0,1],[0,4],[0,5]
+
+	punpckhdq_r2r(mm6, mm4);		// wsptr[0,6],[0,7],[0,2],[0,3]
+	movq_r2r(mm3, mm2);
+
+//Save tmp0 and tmp1 in wsptr
+	movq_r2m(mm0, *(wsptr));		// save tmp0
+	paddw_r2r(mm4, mm2);				// wsptr[xxx],[0,z11],[xxx],[0,z13]
+
+	
+//Continue with z10 --- z13
+	movq_m2r(*(wsptr+2), mm6);		// wsptr[1,0],[1,1],[1,2],[1,3]
+	psubw_r2r(mm4, mm3);				// wsptr[xxx],[0,z12],[xxx],[0,z10]
+
+	movq_m2r(*(wsptr+3), mm0);		// wsptr[1,4],[1,5],[1,6],[1,7]
+	movq_r2r(mm6, mm4);
+
+	movq_r2m(mm1, *(wsptr+1));		// save tmp1
+	punpckldq_r2r(mm0, mm6);		// wsptr[1,0],[1,1],[1,4],[1,5]
+
+	punpckhdq_r2r(mm4, mm0);		// wsptr[1,6],[1,7],[1,2],[1,3]
+	movq_r2r(mm6, mm1);
+	
+//Save tmp2 and tmp3 in wsptr
+	paddw_r2r(mm0, mm6);				// wsptr[xxx],[1,z11],[xxx],[1,z13]
+	movq_r2r(mm2, mm4);
+	
+//Continue with z10 --- z13
+	movq_r2m(mm5, *(wsptr+2));		// save tmp2
+	punpcklwd_r2r(mm6, mm2);		// wsptr[xxx],[xxx],[0,z11],[1,z11]
+
+	psubw_r2r(mm0, mm1);				// wsptr[xxx],[1,z12],[xxx],[1,z10]
+	punpckhwd_r2r(mm6, mm4);		// wsptr[xxx],[xxx],[0,z13],[1,z13]
+
+	movq_r2r(mm3, mm0);
+	punpcklwd_r2r(mm1, mm3);		// wsptr[xxx],[xxx],[0,z12],[1,z12]
+
+	movq_r2m(mm7, *(wsptr+3));		// save tmp3
+	punpckhwd_r2r(mm1, mm0);		// wsptr[xxx],[xxx],[0,z10],[1,z10]
+
+	movq_m2r(*(wsptr+4), mm6);		// wsptr[2,0],[2,1],[2,2],[2,3]
+	punpckhdq_r2r(mm2, mm0);		// wsptr[0,z10],[1,z10],[0,z11],[1,z11]
+
+	movq_m2r(*(wsptr+5), mm7);	// wsptr[2,4],[2,5],[2,6],[2,7]
+	punpckhdq_r2r(mm4, mm3);		// wsptr[0,z12],[1,z12],[0,z13],[1,z13]
+
+	movq_m2r(*(wsptr+6), mm1);	// wsptr[3,0],[3,1],[3,2],[3,3]
+	movq_r2r(mm6, mm4);
+
+	punpckldq_r2r(mm7, mm6);		// wsptr[2,0],[2,1],[2,4],[2,5]
+	movq_r2r(mm1, mm5);
+
+	punpckhdq_r2r(mm4, mm7);		// wsptr[2,6],[2,7],[2,2],[2,3]
+	movq_r2r(mm6, mm2);
+	
+	movq_m2r(*(wsptr+7), mm4);	// wsptr[3,4],[3,5],[3,6],[3,7]
+	paddw_r2r(mm7, mm6);				// wsptr[xxx],[2,z11],[xxx],[2,z13]
+
+	psubw_r2r(mm7, mm2);				// wsptr[xxx],[2,z12],[xxx],[2,z10]
+	punpckldq_r2r(mm4, mm1);		// wsptr[3,0],[3,1],[3,4],[3,5]
+
+	punpckhdq_r2r(mm5, mm4);		// wsptr[3,6],[3,7],[3,2],[3,3]
+	movq_r2r(mm1, mm7);
+
+	paddw_r2r(mm4, mm1);				// wsptr[xxx],[3,z11],[xxx],[3,z13]
+	psubw_r2r(mm4, mm7);				// wsptr[xxx],[3,z12],[xxx],[3,z10]
+
+	movq_r2r(mm6, mm5);
+	punpcklwd_r2r(mm1, mm6);		// wsptr[xxx],[xxx],[2,z11],[3,z11]
+
+	punpckhwd_r2r(mm1, mm5);		// wsptr[xxx],[xxx],[2,z13],[3,z13]
+	movq_r2r(mm2, mm4);
+
+	punpcklwd_r2r(mm7, mm2);		// wsptr[xxx],[xxx],[2,z12],[3,z12]
+
+	punpckhwd_r2r(mm7, mm4);		// wsptr[xxx],[xxx],[2,z10],[3,z10]
+
+	punpckhdq_r2r(mm6, mm4);		// wsptr[2,z10],[3,z10],[2,z11],[3,z11]
+
+	punpckhdq_r2r(mm5, mm2);		// wsptr[2,z12],[3,z12],[2,z13],[3,z13]
+	movq_r2r(mm0, mm5);
+
+	punpckldq_r2r(mm4, mm0);		// wsptr[0,z10],[1,z10],[2,z10],[3,z10]
+
+	punpckhdq_r2r(mm4, mm5);		// wsptr[0,z11],[1,z11],[2,z11],[3,z11]
+	movq_r2r(mm3, mm4);
+
+	punpckhdq_r2r(mm2, mm4);		// wsptr[0,z13],[1,z13],[2,z13],[3,z13]
+	movq_r2r(mm5, mm1);
+
+	punpckldq_r2r(mm2, mm3);		// wsptr[0,z12],[1,z12],[2,z12],[3,z12]
+//    tmp7 = z11 + z13;		/* phase 5 */
+//    tmp8 = z11 - z13;		/* phase 5 */
+	psubw_r2r(mm4, mm1);				// tmp8
+
+	paddw_r2r(mm4, mm5);				// tmp7
+//    tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
+	psllw_i2r(2, mm1);
+
+	psllw_i2r(2, mm0);
+
+	pmulhw_m2r(fix_141, mm1);		// tmp21
+//    tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065))  /* 2*(c2-c6) */
+//			+ MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
+	psllw_i2r(2, mm3);
+	movq_r2r(mm0, mm7);
+
+	pmulhw_m2r(fix_n184, mm7);
+	movq_r2r(mm3, mm6);
+
+	movq_m2r(*(wsptr), mm2);		// tmp0,final1
+
+	pmulhw_m2r(fix_108n184, mm6);
+//	 tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
+//			+ MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
+	movq_r2r(mm2, mm4);				// final1
+  
+	pmulhw_m2r(fix_184n261, mm0);
+	paddw_r2r(mm5, mm2);				// tmp0+tmp7,final1
+
+	pmulhw_m2r(fix_184, mm3);
+	psubw_r2r(mm5, mm4);				// tmp0-tmp7,final1
+
+//    tmp6 = tmp22 - tmp7;	/* phase 2 */
+	psraw_i2r(3, mm2);				// outptr[0,0],[1,0],[2,0],[3,0],final1
+
+	paddw_r2r(mm6, mm7);				// tmp20
+	psraw_i2r(3, mm4);				// outptr[0,7],[1,7],[2,7],[3,7],final1
+
+	paddw_r2r(mm0, mm3);				// tmp22
+
+//    tmp5 = tmp21 - tmp6;
+	psubw_r2r(mm5, mm3);				// tmp6
+
+//    tmp4 = tmp20 + tmp5;
+	movq_m2r(*(wsptr+1), mm0);		// tmp1,final2
+	psubw_r2r(mm3, mm1);				// tmp5
+
+	movq_r2r(mm0, mm6);				// final2
+	paddw_r2r(mm3, mm0);				// tmp1+tmp6,final2
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+//    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
+//			    & RANGE_MASK];	final1
+
+
+//    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
+//			    & RANGE_MASK];	final2
+	psubw_r2r(mm3, mm6);				// tmp1-tmp6,final2
+	psraw_i2r(3, mm0);				// outptr[0,1],[1,1],[2,1],[3,1]
+
+	psraw_i2r(3, mm6);				// outptr[0,6],[1,6],[2,6],[3,6]
+	
+	packuswb_r2r(mm4, mm0);			// out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
+	
+	movq_m2r(*(wsptr+2), mm5);		// tmp2,final3
+	packuswb_r2r(mm6, mm2);			// out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
+
+//    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
+//			    & RANGE_MASK];	final3
+	paddw_r2r(mm1, mm7);				// tmp4
+	movq_r2r(mm5, mm3);
+
+	paddw_r2r(mm1, mm5);				// tmp2+tmp5
+	psubw_r2r(mm1, mm3);				// tmp2-tmp5
+
+	psraw_i2r(3, mm5);				// outptr[0,2],[1,2],[2,2],[3,2]
+
+	movq_m2r(*(wsptr+3), mm4);		// tmp3,final4
+	psraw_i2r(3, mm3);				// outptr[0,5],[1,5],[2,5],[3,5]
+
+
+
+//    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
+//			    & RANGE_MASK];
+//    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
+//			    & RANGE_MASK];	final4
+	movq_r2r(mm4, mm6);
+	paddw_r2r(mm7, mm4);				// tmp3+tmp4
+
+	psubw_r2r(mm7, mm6);				// tmp3-tmp4
+	psraw_i2r(3, mm4);				// outptr[0,4],[1,4],[2,4],[3,4]
+
+	psraw_i2r(3, mm6);				// outptr[0,3],[1,3],[2,3],[3,3]
+
+	/*
+   movq_r2m(mm4, *dummy);
+	fprintf(stderr, "3-4 %016llx\n", dummy);
+   movq_r2m(mm4, *dummy);
+	fprintf(stderr, "3+4 %016llx\n", dummy);
+	*/
+	
+
+	packuswb_r2r(mm4, mm5);			// out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
+
+	packuswb_r2r(mm3, mm6);			// out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
+	movq_r2r(mm2, mm4);
+
+	movq_r2r(mm5, mm7);
+	punpcklbw_r2r(mm0, mm2);		// out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
+
+	punpckhbw_r2r(mm0, mm4);		// out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
+	movq_r2r(mm2, mm1);
+
+	punpcklbw_r2r(mm6, mm5);		// out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
+	
+	punpckhbw_r2r(mm6, mm7);		// out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
+
+	punpcklwd_r2r(mm5, mm2);		// out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
+	
+	movq_r2r(mm7, mm6);
+	punpckhwd_r2r(mm5, mm1);		// out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
+
+	movq_r2r(mm2, mm0);
+	punpcklwd_r2r(mm4, mm6);		// out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
+
+	punpckldq_r2r(mm6, mm2);		// out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
+
+	movq_r2r(mm1, mm3);
+
+	punpckhwd_r2r(mm4, mm7);		// out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
+	
+	dataptr += rskip;
+	movq_r2m(mm2, *(dataptr));
+
+	punpckhdq_r2r(mm6, mm0);		// out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
+
+	dataptr += rskip;
+	movq_r2m(mm0, *(dataptr));
+
+	punpckldq_r2r(mm7, mm1);		// out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
+	
+	punpckhdq_r2r(mm7, mm3);		// out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
+
+	dataptr += rskip;
+	movq_r2m(mm1, *(dataptr));
+
+	dataptr += rskip;
+	movq_r2m(mm3, *(dataptr));
+
+#else
+  __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  __s32 tmp10, tmp11, tmp12, tmp13;
+  __s32 z5, z10, z11, z12, z13;
+  __s16 *inptr;
+  __s32 *wsptr;
+  __u8 *outptr;
+  int ctr;
+  __s32 dcval;
+  __s32 workspace[64];
+
+  inptr = data;
+  wsptr = workspace;
+  for (ctr = 8; ctr > 0; ctr--) {
+    
+    if ((inptr[8] | inptr[16] | inptr[24] |
+	 inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) {
+      dcval = inptr[0];
+      wsptr[0] = dcval;
+      wsptr[8] = dcval;
+      wsptr[16] = dcval;
+      wsptr[24] = dcval;
+      wsptr[32] = dcval;
+      wsptr[40] = dcval;
+      wsptr[48] = dcval;
+      wsptr[56] = dcval;
+      
+      inptr++;	
+      wsptr++;
+      continue;
+    } 
+    
+    tmp0 = inptr[0];
+    tmp1 = inptr[16];
+    tmp2 = inptr[32];
+    tmp3 = inptr[48];
+
+    tmp10 = tmp0 + tmp2;
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    tmp4 = inptr[8];
+    tmp5 = inptr[24];
+    tmp6 = inptr[40];
+    tmp7 = inptr[56];
+
+    z13 = tmp6 + tmp5;
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[0] = (__s32) (tmp0 + tmp7);
+    wsptr[56] = (__s32) (tmp0 - tmp7);
+    wsptr[8] = (__s32) (tmp1 + tmp6);
+    wsptr[48] = (__s32) (tmp1 - tmp6);
+    wsptr[16] = (__s32) (tmp2 + tmp5);
+    wsptr[40] = (__s32) (tmp2 - tmp5);
+    wsptr[32] = (__s32) (tmp3 + tmp4);
+    wsptr[24] = (__s32) (tmp3 - tmp4);
+
+    inptr++;
+    wsptr++;
+  }
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++) {
+    outptr = &(odata[ctr*rskip]);
+
+    tmp10 = wsptr[0] + wsptr[4];
+    tmp11 = wsptr[0] - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    outptr[0] = RL(DESCALE(tmp0 + tmp7));
+    outptr[7] = RL(DESCALE(tmp0 - tmp7));
+    outptr[1] = RL(DESCALE(tmp1 + tmp6));
+    outptr[6] = RL(DESCALE(tmp1 - tmp6));
+    outptr[2] = RL(DESCALE(tmp2 + tmp5));
+    outptr[5] = RL(DESCALE(tmp2 - tmp5));
+    outptr[4] = RL(DESCALE(tmp3 + tmp4));
+    outptr[3] = RL(DESCALE(tmp3 - tmp4));
+
+    wsptr += 8;
+  }
+#endif
+}
+/*
+
+Main Routines
+
+This file contains most of the initialisation and control functions
+
+(C) Justin Schoeman 1998
+
+*/
+
+/*
+
+Private function
+
+Initialise all the cache-aliged data blocks
+
+*/
+
+void RTjpeg_init_data(void)
+{
+ unsigned long dptr;
+ 
+ dptr=(unsigned long)&(RTjpeg_alldata[0]);
+ dptr+=32;
+ dptr=dptr>>5;
+ dptr=dptr<<5; /* cache align data */
+ 
+ RTjpeg_block=(__s16 *)dptr;
+ dptr+=sizeof(__s16)*64;
+ RTjpeg_lqt=(__s32 *)dptr;
+ dptr+=sizeof(__s32)*64;
+ RTjpeg_cqt=(__s32 *)dptr;
+ dptr+=sizeof(__s32)*64;
+ RTjpeg_liqt=(__u32 *)dptr;
+ dptr+=sizeof(__u32)*64;
+ RTjpeg_ciqt=(__u32 *)dptr;
+}
+
+/*
+
+External Function
+
+Re-set quality factor
+
+Input: buf -> pointer to 128 ints for quant values store to pass back to
+              init_decompress.
+       Q -> quality factor (192=best, 32=worst)
+*/
+
+void RTjpeg_init_Q(__u8 Q)
+{
+ int i;
+ __u64 qual;
+ 
+ qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */
+
+ for(i=0; i<64; i++)
+ {
+  RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
+  if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
+  RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
+  if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
+  RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
+  RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
+  RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
+  RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
+ }
+ 
+ RTjpeg_lb8=0;
+ while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
+ RTjpeg_lb8--;
+ RTjpeg_cb8=0;
+ while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
+ RTjpeg_cb8--;
+
+ RTjpeg_dct_init();
+ RTjpeg_idct_init();
+ RTjpeg_quant_init();
+}
+
+/*
+
+External Function
+
+Initialise compression.
+
+Input: buf -> pointer to 128 ints for quant values store to pass back to 
+                init_decompress.
+       width -> width of image
+       height -> height of image
+       Q -> quality factor (192=best, 32=worst)
+       
+*/
+
+void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q)
+{
+ int i;
+ __u64 qual;
+ 
+ RTjpeg_init_data();
+ 
+ RTjpeg_width=width;
+ RTjpeg_height=height;
+ RTjpeg_Ywidth = RTjpeg_width>>3;
+ RTjpeg_Ysize=width * height;
+ RTjpeg_Cwidth = RTjpeg_width>>4;
+ RTjpeg_Csize= (width>>1) * height;
+
+ qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */
+
+ for(i=0; i<64; i++)
+ {
+  RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
+  if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
+  RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
+  if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
+  RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
+  RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
+  RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
+  RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
+ }
+ 
+ RTjpeg_lb8=0;
+ while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
+ RTjpeg_lb8--;
+ RTjpeg_cb8=0;
+ while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
+ RTjpeg_cb8--;
+ 
+ RTjpeg_dct_init();
+ RTjpeg_quant_init();
+
+ for(i=0; i<64; i++)
+  buf[i]=RTjpeg_liqt[i];
+ for(i=0; i<64; i++)
+  buf[64+i]=RTjpeg_ciqt[i];
+}
+
+void RTjpeg_init_decompress(__u32 *buf, int width, int height)
+{
+ int i;
+
+ RTjpeg_init_data();
+ 
+ RTjpeg_width=width;
+ RTjpeg_height=height;
+ RTjpeg_Ywidth = RTjpeg_width>>3;
+ RTjpeg_Ysize=width * height;
+ RTjpeg_Cwidth = RTjpeg_width>>4;
+ RTjpeg_Csize= (width>>1) * height;
+
+ for(i=0; i<64; i++)
+ {
+  RTjpeg_liqt[i]=buf[i];
+  RTjpeg_ciqt[i]=buf[i+64];
+ }
+
+ RTjpeg_lb8=0;
+ while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
+ RTjpeg_lb8--;
+ RTjpeg_cb8=0;
+ while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
+ RTjpeg_cb8--;
+
+ RTjpeg_idct_init();
+
+// RTjpeg_color_init();
+}
+
+int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp)
+{
+ __s8 * sb;
+ register __s8 * bp1 = bp + (RTjpeg_width<<3);
+ register __s8 * bp2 = bp + RTjpeg_Ysize;
+ register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
+ register int i, j, k;
+
+#ifdef MMX
+ emms();
+#endif
+ sb=sp;
+/* Y */
+ for(i=RTjpeg_height>>1; i; i-=8)
+ {
+  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
+  {
+   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+
+   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+
+   RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+
+   RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+
+   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+
+   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+
+  }
+  bp+=RTjpeg_width<<4;
+  bp1+=RTjpeg_width<<4;
+  bp2+=RTjpeg_width<<2;
+  bp3+=RTjpeg_width<<2;
+			 
+ }
+#ifdef MMX
+ emms();
+#endif
+ return (sp-sb);
+}
+
+int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp)
+{
+ __s8 * sb;
+ register __s8 * bp2 = bp + RTjpeg_Ysize;
+ register __s8 * bp3 = bp2 + RTjpeg_Csize;
+ register int i, j, k;
+
+#ifdef MMX
+ emms();
+#endif
+ sb=sp;
+/* Y */
+ for(i=RTjpeg_height; i; i-=8)
+ {
+  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
+  {
+   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+
+   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+
+   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+
+   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+
+  }
+  bp+=RTjpeg_width<<3;
+  bp2+=RTjpeg_width<<2;
+  bp3+=RTjpeg_width<<2;
+			 
+ }
+#ifdef MMX
+ emms();
+#endif
+ return (sp-sb);
+}
+
+int RTjpeg_compress8(__s8 *sp, unsigned char *bp)
+{
+ __s8 * sb;
+ int i, j;
+
+#ifdef MMX
+ emms();
+#endif
+ 
+ sb=sp;
+/* Y */
+ for(i=0; i<RTjpeg_height; i+=8)
+ {
+  for(j=0; j<RTjpeg_width; j+=8)
+  {
+   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+  }
+  bp+=RTjpeg_width;
+ }
+
+#ifdef MMX
+ emms();
+#endif
+ return (sp-sb);
+}
+
+void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp)
+{
+ register __s8 * bp2 = bp + RTjpeg_Ysize;
+ register __s8 * bp3 = bp2 + (RTjpeg_Csize);
+ int i, j,k;
+
+#ifdef MMX
+ emms();
+#endif
+
+/* Y */
+ for(i=RTjpeg_height; i; i-=8)
+ {
+  for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+    RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
+   }
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+    RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
+   }
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+    RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
+   } 
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+    RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
+   } 
+  }
+  bp+=RTjpeg_width<<3;
+  bp2+=RTjpeg_width<<2;
+  bp3+=RTjpeg_width<<2;
+ }
+#ifdef MMX
+ emms();
+#endif
+}
+
+void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp)
+{
+ register __s8 * bp1 = bp + (RTjpeg_width<<3);
+ register __s8 * bp2 = bp + RTjpeg_Ysize;
+ register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
+ int i, j,k;
+
+#ifdef MMX
+ emms();
+#endif
+
+/* Y */
+ for(i=RTjpeg_height>>1; i; i-=8)
+ {
+  for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+    RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
+   }
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+    RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
+   }
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+    RTjpeg_idct(bp1+j, RTjpeg_block, RTjpeg_width);
+   }
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+    RTjpeg_idct(bp1+j+8, RTjpeg_block, RTjpeg_width);
+   }
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+    RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
+   } 
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
+    RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
+   } 
+  }
+  bp+=RTjpeg_width<<4;
+  bp1+=RTjpeg_width<<4;
+  bp2+=RTjpeg_width<<2;
+  bp3+=RTjpeg_width<<2;
+ }
+#ifdef MMX
+ emms();
+#endif
+}
+
+void RTjpeg_decompress8(__s8 *sp, __u8 *bp)
+{
+ int i, j;
+
+#ifdef MMX
+ emms();
+#endif
+
+/* Y */
+ for(i=0; i<RTjpeg_height; i+=8)
+ {
+  for(j=0; j<RTjpeg_width; j+=8)
+   if(*sp==-1)sp++;
+   else
+   { 
+    sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
+    RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
+   }
+  bp+=RTjpeg_width<<3;
+ }
+}
+
+/*
+External Function
+
+Initialise additional data structures for motion compensation
+
+*/
+
+void RTjpeg_init_mcompress(void)
+{
+ unsigned long tmp;
+
+ if(!RTjpeg_old)
+ {
+  RTjpeg_old=malloc((4*RTjpeg_width*RTjpeg_height)+32);
+  tmp=(unsigned long)RTjpeg_old;
+  tmp+=32;
+  tmp=tmp>>5;
+  RTjpeg_old=(__s16 *)(tmp<<5);
+ }
+ if (!RTjpeg_old)
+ {
+  fprintf(stderr, "RTjpeg: Could not allocate memory\n");
+  exit(-1);
+ }
+ bzero(RTjpeg_old, ((4*RTjpeg_width*RTjpeg_height)));
+}
+
+#ifdef MMX
+
+int RTjpeg_bcomp(__s16 *old, mmx_t *mask)
+{
+ int i;
+ mmx_t *mold=(mmx_t *)old;
+ mmx_t *mblock=(mmx_t *)RTjpeg_block;
+ volatile mmx_t result;
+ static mmx_t neg=(mmx_t)(unsigned long long)0xffffffffffffffffULL;
+ 
+ movq_m2r(*mask, mm7);
+ movq_m2r(neg, mm6);
+ pxor_r2r(mm5, mm5);
+ 
+ for(i=0; i<8; i++)
+ {
+  movq_m2r(*(mblock++), mm0);
+  			movq_m2r(*(mblock++), mm2);
+  movq_m2r(*(mold++), mm1);
+  			movq_m2r(*(mold++), mm3);
+  psubsw_r2r(mm1, mm0);
+  			psubsw_r2r(mm3, mm2);
+  movq_r2r(mm0, mm1);
+  			movq_r2r(mm2, mm3);
+  pcmpgtw_r2r(mm7, mm0);
+  			pcmpgtw_r2r(mm7, mm2);
+  pxor_r2r(mm6, mm1);
+  			pxor_r2r(mm6, mm3);
+  pcmpgtw_r2r(mm7, mm1);
+  			pcmpgtw_r2r(mm7, mm3);
+  por_r2r(mm0, mm5);
+  			por_r2r(mm2, mm5);
+  por_r2r(mm1, mm5);
+  			por_r2r(mm3, mm5);
+ }
+ movq_r2m(mm5, result);
+ 
+ if(result.q)
+ {
+//  if(!RTjpeg_mtest)
+//   for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
+  return 0;
+ }
+// printf(".");
+ return 1;
+}
+
+#else
+int RTjpeg_bcomp(__s16 *old, __u16 *mask)
+{
+ int i;
+
+ for(i=0; i<64; i++)
+  if(abs(old[i]-RTjpeg_block[i])>*mask)
+  {
+   if(!RTjpeg_mtest)
+    for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
+   return 0;
+  }
+ return 1;
+}
+#endif
+
+void RTjpeg_set_test(int i)
+{
+ RTjpeg_mtest=i;
+}
+
+int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
+{
+ __s8 * sb;
+//rh __s16 *block;
+ register __s8 * bp1 = bp + (RTjpeg_width<<3);
+ register __s8 * bp2 = bp + RTjpeg_Ysize;
+ register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
+ register int i, j, k;
+
+#ifdef MMX
+ emms();
+ RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
+ RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask);
+#else
+ RTjpeg_lmask=lmask;
+ RTjpeg_cmask=cmask;
+#endif
+ 
+ sb=sp;
+ block=RTjpeg_old;
+/* Y */
+ for(i=RTjpeg_height>>1; i; i-=8)
+ {
+  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
+  {
+   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+   block+=64;
+
+   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+   block+=64;
+
+   RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+   block+=64;
+
+   RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+   block+=64;
+
+   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+   block+=64;
+
+   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+   block+=64;
+  }
+  bp+=RTjpeg_width<<4;
+  bp1+=RTjpeg_width<<4;
+  bp2+=RTjpeg_width<<2;
+  bp3+=RTjpeg_width<<2;
+			 
+ }
+#ifdef MMX
+ emms();
+#endif
+ return (sp-sb);
+}
+
+
+int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
+{
+ __s8 * sb;
+ __s16 *block;
+ register __s8 * bp2;
+ register __s8 * bp3;
+ register int i, j, k;
+
+#ifdef MMX
+ emms();
+ RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
+ RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask);
+#else
+ RTjpeg_lmask=lmask;
+ RTjpeg_cmask=cmask;
+#endif
+ 
+ bp = bp - RTjpeg_width*0;
+ bp2 = bp + RTjpeg_Ysize-RTjpeg_width*0;
+ bp3 = bp2 + RTjpeg_Csize;
+
+ sb=sp;
+ block=RTjpeg_old;
+/* Y */
+ for(i=RTjpeg_height; i; i-=8)
+ {
+  for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
+  {
+   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+   block+=64;
+
+   RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+   block+=64;
+
+   RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+   block+=64;
+
+   RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_cmask))
+   {
+    *((__u8 *)sp++)=255;
+   } 
+	else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
+   block+=64;
+
+  }
+  bp+=RTjpeg_width<<3;
+  bp2+=RTjpeg_width<<2;
+  bp3+=RTjpeg_width<<2;
+ }
+ printf ("%d\n", block - RTjpeg_old);
+#ifdef MMX
+ emms();
+#endif
+ return (sp-sb);
+}
+
+int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask)
+{
+ __s8 * sb;
+ __s16 *block;
+ int i, j;
+
+#ifdef MMX
+ emms();
+ RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
+#else
+ RTjpeg_lmask=lmask;
+#endif
+
+ 
+ sb=sp;
+ block=RTjpeg_old;
+/* Y */
+ for(i=0; i<RTjpeg_height; i+=8)
+ {
+  for(j=0; j<RTjpeg_width; j+=8)
+  {
+   RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
+   RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
+   if(RTjpeg_bcomp(block, &RTjpeg_lmask))
+   {
+    *((__u8 *)sp++)=255;
+//    printf("* %d ", sp[-1]);
+   } else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
+   block+=64;
+  }
+  bp+=RTjpeg_width<<3;
+ }
+#ifdef MMX
+ emms();
+#endif
+ return (sp-sb);
+}
+
+void RTjpeg_color_init(void)
+{
+}  
+
+#define KcrR 76284
+#define KcrG 53281
+#define KcbG 25625
+#define KcbB 132252
+#define Ky 76284
+
+void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride)
+{
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute;
+ int yskip;
+ 
+ yskip=RTjpeg_width;
+ 
+ bufcb=&buf[RTjpeg_width*RTjpeg_height];
+ bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
+ bufy=&buf[0];
+ bufoute=rgb;
+ 
+ for(i=0; i<(RTjpeg_height); i++)
+ {
+  for(j=0; j<RTjpeg_width; j+=2)
+  {
+   crR=(*bufcr-128)*KcrR;
+   crG=(*(bufcr++)-128)*KcrG;
+   cbG=(*bufcb-128)*KcbG;
+   cbB=(*(bufcb++)-128)*KcbB;
+  
+   y=(bufy[j]-16)*Ky;
+   
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+   y=(bufy[j+1]-16)*Ky;
+
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+  }
+  bufy+=yskip;
+ }
+}
+
+
+void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride)
+{
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+ 
+ if(stride==0)
+ 	oskip=RTjpeg_width*3;
+ else
+ 	oskip=2*stride-RTjpeg_width*3;
+ 
+ yskip=RTjpeg_width;
+ 
+ bufcb=&buf[RTjpeg_width*RTjpeg_height];
+ bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
+ bufy=&buf[0];
+ bufoute=rgb;
+ bufouto=rgb+RTjpeg_width*3;
+ 
+ for(i=0; i<(RTjpeg_height>>1); i++)
+ {
+  for(j=0; j<RTjpeg_width; j+=2)
+  {
+   crR=(*bufcr-128)*KcrR;
+   crG=(*(bufcr++)-128)*KcrG;
+   cbG=(*bufcb-128)*KcbG;
+   cbB=(*(bufcb++)-128)*KcbB;
+  
+   y=(bufy[j]-16)*Ky;
+   
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+   y=(bufy[j+1]-16)*Ky;
+
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+   y=(bufy[j+yskip]-16)*Ky;
+
+   tmp=(y+crR)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+cbB)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+   y=(bufy[j+1+yskip]-16)*Ky;
+
+   tmp=(y+crR)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+cbB)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   
+  }
+  bufoute+=oskip;
+  bufouto+=oskip;
+  bufy+=yskip<<1;
+ }
+}
+
+
+void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride)
+{
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+ 
+ if(stride==0)
+ 	oskip=RTjpeg_width*4;
+ else
+ 	oskip = 2*stride-RTjpeg_width*4;
+ yskip=RTjpeg_width;
+ 
+ bufcb=&buf[RTjpeg_width*RTjpeg_height];
+ bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
+ bufy=&buf[0];
+ bufoute=rgb;
+ bufouto=rgb+RTjpeg_width*4;
+ 
+ for(i=0; i<(RTjpeg_height>>1); i++)
+ {
+  for(j=0; j<RTjpeg_width; j+=2)
+  {
+   crR=(*bufcr-128)*KcrR;
+   crG=(*(bufcr++)-128)*KcrG;
+   cbG=(*bufcb-128)*KcbG;
+   cbB=(*(bufcb++)-128)*KcbB;
+  
+   y=(bufy[j]-16)*Ky;
+   
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   bufoute++;
+
+   y=(bufy[j+1]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   bufoute++;
+
+   y=(bufy[j+yskip]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   bufouto++;
+
+   y=(bufy[j+1+yskip]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   bufouto++;
+   
+  }
+  bufoute+=oskip;
+  bufouto+=oskip;
+  bufy+=yskip<<1;
+ }
+}
+
+void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride)
+{
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+ 
+ if(stride==0)
+ 	oskip=RTjpeg_width*3;
+ else
+ 	oskip=2*stride - RTjpeg_width*3;
+ 	
+ yskip=RTjpeg_width;
+ 
+ bufcb=&buf[RTjpeg_width*RTjpeg_height];
+ bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
+ bufy=&buf[0];
+ bufoute=rgb;
+ bufouto=rgb+RTjpeg_width*3;
+ 
+ for(i=0; i<(RTjpeg_height>>1); i++)
+ {
+  for(j=0; j<RTjpeg_width; j+=2)
+  {
+   crR=(*bufcr-128)*KcrR;
+   crG=(*(bufcr++)-128)*KcrG;
+   cbG=(*bufcb-128)*KcbG;
+   cbB=(*(bufcb++)-128)*KcbB;
+  
+   y=(bufy[j]-16)*Ky;
+   
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+   y=(bufy[j+1]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+   y=(bufy[j+yskip]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+
+   y=(bufy[j+1+yskip]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
+   
+  }
+  bufoute+=oskip;
+  bufouto+=oskip;
+  bufy+=yskip<<1;
+ }
+}
+
+void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride)
+{
+ int tmp;
+ int i, j;
+ __s32 y, crR, crG, cbG, cbB;
+ __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
+ int oskip, yskip;
+ unsigned char r, g, b;
+ 
+ if(stride==0)
+ 	oskip=RTjpeg_width*2;
+ else
+ 	oskip=2*stride-RTjpeg_width*2;
+ 	
+ yskip=RTjpeg_width;
+ 
+ bufcb=&buf[RTjpeg_width*RTjpeg_height];
+ bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
+ bufy=&buf[0];
+ bufoute=rgb;
+ bufouto=rgb+RTjpeg_width*2;
+ 
+ for(i=0; i<(RTjpeg_height>>1); i++)
+ {
+  for(j=0; j<RTjpeg_width; j+=2)
+  {
+   crR=(*bufcr-128)*KcrR;
+   crG=(*(bufcr++)-128)*KcrG;
+   cbG=(*bufcb-128)*KcbG;
+   cbB=(*(bufcb++)-128)*KcbB;
+  
+   y=(bufy[j]-16)*Ky;
+   
+   tmp=(y+cbB)>>16;
+   b=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   g=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   r=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(int)((int)b >> 3);
+   tmp|=(int)(((int)g >> 2) << 5);
+   tmp|=(int)(((int)r >> 3) << 11);
+   *(bufoute++)=tmp&0xff;
+   *(bufoute++)=tmp>>8;
+   
+
+   y=(bufy[j+1]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   b=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   g=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   r=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(int)((int)b >> 3);
+   tmp|=(int)(((int)g >> 2) << 5);
+   tmp|=(int)(((int)r >> 3) << 11);
+   *(bufoute++)=tmp&0xff;
+   *(bufoute++)=tmp>>8;
+
+   y=(bufy[j+yskip]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   b=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   g=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   r=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(int)((int)b >> 3);
+   tmp|=(int)(((int)g >> 2) << 5);
+   tmp|=(int)(((int)r >> 3) << 11);
+   *(bufouto++)=tmp&0xff;
+   *(bufouto++)=tmp>>8;
+
+   y=(bufy[j+1+yskip]-16)*Ky;
+
+   tmp=(y+cbB)>>16;
+   b=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y-crG-cbG)>>16;
+   g=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(y+crR)>>16;
+   r=(tmp>255)?255:((tmp<0)?0:tmp);
+   tmp=(int)((int)b >> 3);
+   tmp|=(int)(((int)g >> 2) << 5);
+   tmp|=(int)(((int)r >> 3) << 11);
+   *(bufouto++)=tmp&0xff;
+   *(bufouto++)=tmp>>8;
+
+  }
+  bufoute+=oskip;
+  bufouto+=oskip;
+  bufy+=yskip<<1;
+ }
+}
+
+/* fix stride */
+
+void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride)
+{
+ bcopy(buf, rgb, RTjpeg_width*RTjpeg_height);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/RTjpegN.h	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,58 @@
+/* 
+   RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za)
+   
+   With modifications by:
+   (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de>
+   and
+   (c) 1999 by Wim Taymans <wim.taymans@tvd.be>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+    
+*/
+
+#ifndef _I386_TYPES_H
+typedef unsigned char __u8;
+typedef unsigned short __u16;
+typedef unsigned long __u32;
+typedef unsigned long long __u64;
+typedef signed char __s8;
+typedef signed short __s16;
+typedef signed long __s32;
+#endif
+
+extern void RTjpeg_init_Q(__u8 Q);
+extern void RTjpeg_init_compress(long unsigned int *buf, int width, int height, __u8 Q);
+extern void RTjpeg_init_decompress(long unsigned int *buf, int width, int height);
+extern int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp);
+extern int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp);
+extern void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp);
+extern void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp);
+extern int RTjpeg_compress8(__s8 *sp, unsigned char *bp);
+extern void RTjpeg_decompress8(__s8 *sp, __u8 *bp);
+
+extern void RTjpeg_init_mcompress(void);
+extern int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask);
+extern int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask);
+extern int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask);
+extern void RTjpeg_set_test(int i);
+
+extern void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride);
+extern void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride);
+extern void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride);
+extern void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride);
+extern void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride);
+extern void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride);
+		  
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/alaw.h	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,71 @@
+// Generated by TOOLS/alaw-gen.c
+
+short alaw2short[]={
+ -5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736,
+ -7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784,
+ -2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368,
+ -3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392,
+-22016,-20992,-24064,-23040,-17920,-16896,-19968,-18944,
+-30208,-29184,-32256,-31232,-26112,-25088,-28160,-27136,
+-11008,-10496,-12032,-11520, -8960, -8448, -9984, -9472,
+-15104,-14592,-16128,-15616,-13056,-12544,-14080,-13568,
+  -344,  -328,  -376,  -360,  -280,  -264,  -312,  -296,
+  -472,  -456,  -504,  -488,  -408,  -392,  -440,  -424,
+   -88,   -72,  -120,  -104,   -24,    -8,   -56,   -40,
+  -216,  -200,  -248,  -232,  -152,  -136,  -184,  -168,
+ -1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184,
+ -1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696,
+  -688,  -656,  -752,  -720,  -560,  -528,  -624,  -592,
+  -944,  -912, -1008,  -976,  -816,  -784,  -880,  -848,
+  5504,  5248,  6016,  5760,  4480,  4224,  4992,  4736,
+  7552,  7296,  8064,  7808,  6528,  6272,  7040,  6784,
+  2752,  2624,  3008,  2880,  2240,  2112,  2496,  2368,
+  3776,  3648,  4032,  3904,  3264,  3136,  3520,  3392,
+ 22016, 20992, 24064, 23040, 17920, 16896, 19968, 18944,
+ 30208, 29184, 32256, 31232, 26112, 25088, 28160, 27136,
+ 11008, 10496, 12032, 11520,  8960,  8448,  9984,  9472,
+ 15104, 14592, 16128, 15616, 13056, 12544, 14080, 13568,
+   344,   328,   376,   360,   280,   264,   312,   296,
+   472,   456,   504,   488,   408,   392,   440,   424,
+    88,    72,   120,   104,    24,     8,    56,    40,
+   216,   200,   248,   232,   152,   136,   184,   168,
+  1376,  1312,  1504,  1440,  1120,  1056,  1248,  1184,
+  1888,  1824,  2016,  1952,  1632,  1568,  1760,  1696,
+   688,   656,   752,   720,   560,   528,   624,   592,
+   944,   912,  1008,   976,   816,   784,   880,   848
+};
+
+short ulaw2short[]={
+-32124,-31100,-30076,-29052,-28028,-27004,-25980,-24956,
+-23932,-22908,-21884,-20860,-19836,-18812,-17788,-16764,
+-15996,-15484,-14972,-14460,-13948,-13436,-12924,-12412,
+-11900,-11388,-10876,-10364, -9852, -9340, -8828, -8316,
+ -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140,
+ -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092,
+ -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004,
+ -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980,
+ -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436,
+ -1372, -1308, -1244, -1180, -1116, -1052,  -988,  -924,
+  -876,  -844,  -812,  -780,  -748,  -716,  -684,  -652,
+  -620,  -588,  -556,  -524,  -492,  -460,  -428,  -396,
+  -372,  -356,  -340,  -324,  -308,  -292,  -276,  -260,
+  -244,  -228,  -212,  -196,  -180,  -164,  -148,  -132,
+  -120,  -112,  -104,   -96,   -88,   -80,   -72,   -64,
+   -56,   -48,   -40,   -32,   -24,   -16,    -8,     0,
+ 32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956,
+ 23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764,
+ 15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412,
+ 11900, 11388, 10876, 10364,  9852,  9340,  8828,  8316,
+  7932,  7676,  7420,  7164,  6908,  6652,  6396,  6140,
+  5884,  5628,  5372,  5116,  4860,  4604,  4348,  4092,
+  3900,  3772,  3644,  3516,  3388,  3260,  3132,  3004,
+  2876,  2748,  2620,  2492,  2364,  2236,  2108,  1980,
+  1884,  1820,  1756,  1692,  1628,  1564,  1500,  1436,
+  1372,  1308,  1244,  1180,  1116,  1052,   988,   924,
+   876,   844,   812,   780,   748,   716,   684,   652,
+   620,   588,   556,   524,   492,   460,   428,   396,
+   372,   356,   340,   324,   308,   292,   276,   260,
+   244,   228,   212,   196,   180,   164,   148,   132,
+   120,   112,   104,    96,    88,    80,    72,    64,
+    56,    48,    40,    32,    24,    16,     8,     0
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/cinepak.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,886 @@
+/* ------------------------------------------------------------------------
+ * Radius Cinepak Video Decoder
+ *
+ * Dr. Tim Ferguson, 2001.
+ * For more details on the algorithm:
+ *         http://www.csse.monash.edu.au/~timf/videocodec.html
+ *
+ * This is basically a vector quantiser with adaptive vector density.  The
+ * frame is segmented into 4x4 pixel blocks, and each block is coded using
+ * either 1 or 4 vectors.
+ *
+ * There are still some issues with this code yet to be resolved.  In
+ * particular with decoding in the strip boundaries.
+ * ------------------------------------------------------------------------ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "config.h"
+#include "mp_msg.h"
+#include "bswap.h"
+
+#include "libvo/img_format.h"
+#include "mp_image.h"
+
+#define DBUG	0
+#define MAX_STRIPS 32
+
+/* ------------------------------------------------------------------------ */
+typedef struct
+{
+	unsigned char y0, y1, y2, y3;
+	char u, v;
+
+	// These variables are for YV12 output: The v1 vars are for
+	// when the vector is doublesized and used by itself to paint a
+	// 4x4 block.
+	// This quad (y0 y0 y1 y1) is used on the 2 upper rows.
+	unsigned long yv12_v1_u;
+	// This quad (y2 y2 y3 y3) is used on the 2 lower rows.
+	unsigned long yv12_v1_l;
+	// The v4 vars are for when the vector is used as 1 of 4 vectors
+	// to paint a 4x4 block.
+	// Upper pair (y0 y1):
+	unsigned short yv12_v4_u;
+	// Lower pair (y2 y3):
+	unsigned short yv12_v4_l;
+
+	// These longs are for YUY2 output: The v1 vars are for when the
+	// vector is doublesized and used by itself to paint a 4x4 block.
+	// The names stand for the upper-left, upper-right,
+	// lower-left, and lower-right YUY2 pixel pairs.
+        unsigned long yuy2_v1_ul, yuy2_v1_ur;
+        unsigned long yuy2_v1_ll, yuy2_v1_lr;
+	// The v4 vars are for when the vector is used as 1 of 4 vectors
+	// to paint a 4x4 block. The names stand for upper and lower
+	// YUY2 pixel pairs.
+        unsigned long yuy2_v4_u, yuy2_v4_l;
+
+	// These longs are for BGR32 output
+	unsigned long rgb0, rgb1, rgb2, rgb3;
+
+	// These char arrays are for BGR24 output
+	unsigned char r[4], g[4], b[4];
+} cvid_codebook;
+
+typedef struct {
+	cvid_codebook *v4_codebook[MAX_STRIPS];
+	cvid_codebook *v1_codebook[MAX_STRIPS];
+	unsigned long strip_num;
+} cinepak_info;
+
+
+/* ------------------------------------------------------------------------ */
+static unsigned char *in_buffer, uiclip[1024], *uiclp = NULL;
+
+#define SCALEBITS 16
+#define ONE_HALF  ((long) 1 << (SCALEBITS-1))
+#define FIX(x)    ((long) ((x) * (1L<<SCALEBITS) + 0.5))
+static long CU_Y_tab[256], CV_Y_tab[256], CU_Cb_tab[256], CV_Cb_tab[256],
+	CU_Cr_tab[256], CV_Cr_tab[256];
+
+#define get_byte() *(in_buffer++)
+#define skip_byte() in_buffer++
+#define get_word() ((unsigned short)(in_buffer += 2, \
+	(in_buffer[-2] << 8 | in_buffer[-1])))
+#define get_long() ((unsigned long)(in_buffer += 4, \
+	(in_buffer[-4] << 24 | in_buffer[-3] << 16 | in_buffer[-2] << 8 | in_buffer[-1])))
+
+
+/* ---------------------------------------------------------------------- */
+
+// This PACKing macro packs the luminance bytes as y1-y1-y0-y0, which is
+// stored on a little endian machine as y0-y0-y1-y1. Therefore, treat it as
+// a little endian number and rearrange the bytes on big endian machines
+// using the built-in byte order macros.
+#define PACK_YV12_V1_Y(cb,y0,y1) le2me_32((((unsigned char)cb->y1)<<24)|(cb->y1<<16)|(((unsigned char)cb->y0)<<8)|(cb->y0))
+#define PACK_YV12_V4_Y(cb,y0,y1) le2me_16((((unsigned char)cb->y1)<<8)|(cb->y0))
+
+static inline void read_codebook_yv12(cvid_codebook *c, int mode)
+{
+unsigned char y0, y1, y2, y3, u, v;
+int y_uv;
+
+	if(mode)		/* black and white */
+		{
+		c->y0 = get_byte();
+		c->y1 = get_byte();
+		c->y2 = get_byte();
+		c->y3 = get_byte();
+		c->u = c->v = 128;
+		}
+	else			/* colour */
+		{
+		y0 = get_byte();  /* luma */
+		y1 = get_byte();
+		y2 = get_byte();
+		y3 = get_byte();
+		u = 128+get_byte(); /* chroma */
+		v = 128+get_byte();
+
+		/*             YUV * inv(CinYUV)
+		 *  | Y  |   | 1 -0.0655  0.0110 | | CY |
+		 *  | Cb | = | 0  1.1656 -0.0062 | | CU |
+		 *  | Cr |   | 0  0.0467  1.4187 | | CV |
+		 */
+		y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS);
+		c->y0 = uiclp[y0 + y_uv];
+		c->y1 = uiclp[y1 + y_uv];
+		c->y2 = uiclp[y2 + y_uv];
+		c->y3 = uiclp[y3 + y_uv];
+		c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)];
+		c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)];
+
+		c->yv12_v1_u = PACK_YV12_V1_Y(c, y0, y1);
+		c->yv12_v1_l = PACK_YV12_V1_Y(c, y2, y3);
+		c->yv12_v4_u = PACK_YV12_V4_Y(c, y0, y1);
+		c->yv12_v4_l = PACK_YV12_V4_Y(c, y2, y3);
+		}
+}
+
+/* ---------------------------------------------------------------------- */
+
+inline void cvid_v1_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
+{
+unsigned char *p;
+int stride;
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	// take care of the luminance
+	stride = mpi->stride[0];
+	p = mpi->planes[0]+y*stride+x;
+	*((unsigned int*)p)=cb->yv12_v1_u;
+	*((unsigned int*)(p+stride))=cb->yv12_v1_u;
+	*((unsigned int*)(p+stride*2))=cb->yv12_v1_l;
+	*((unsigned int*)(p+stride*3))=cb->yv12_v1_l;
+
+	// now for the chrominance
+	x/=2; y/=2;
+
+	stride = mpi->stride[1];
+	p = mpi->planes[1]+y*stride+x;
+	p[0]=p[1]=p[stride]=p[stride+1]=cb->u;
+
+	stride = mpi->stride[2];
+	p = mpi->planes[2]+y*stride+x;
+	p[0]=p[1]=p[stride]=p[stride+1]=cb->v;
+
+}
+
+/* ---------------------------------------------------------------------- */
+inline void cvid_v4_yv12(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
+	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
+{
+unsigned char *p;
+int stride;
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	// take care of the luminance
+	stride = mpi->stride[0];
+	p = mpi->planes[0]+y*stride+x;
+	((unsigned short*)p)[0]=cb0->yv12_v4_u;
+	((unsigned short*)p)[1]=cb1->yv12_v4_u;
+	((unsigned short*)(p+stride))[0]=cb0->yv12_v4_l;
+	((unsigned short*)(p+stride))[1]=cb1->yv12_v4_l;
+	((unsigned short*)(p+stride*2))[0]=cb2->yv12_v4_u;
+	((unsigned short*)(p+stride*2))[1]=cb3->yv12_v4_u;
+	((unsigned short*)(p+stride*3))[0]=cb2->yv12_v4_l;
+	((unsigned short*)(p+stride*3))[1]=cb3->yv12_v4_l;
+
+	// now for the chrominance
+	x/=2; y/=2;
+
+	stride = mpi->stride[1];
+	p = mpi->planes[1]+y*stride+x;
+	p[0]=cb0->u; p[1]=cb1->u;
+	p[stride]=cb2->u; p[stride+1]=cb3->u;
+
+	stride = mpi->stride[2];
+	p = mpi->planes[2]+y*stride+x;
+	p[0]=cb0->v; p[1]=cb1->v;
+	p[stride]=cb2->v; p[stride+1]=cb3->v;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define PACK_YUY2(cb,y0,y1,u,v) le2me_32(((((unsigned char)cb->v)<<24)|(cb->y1<<16)|(((unsigned char)cb->u)<<8)|(cb->y0)))
+
+static inline void read_codebook_yuy2(cvid_codebook *c, int mode)
+{
+unsigned char y0, y1, y2, y3, u, v;
+int y_uv;
+
+	if(mode)		/* black and white */
+		{
+		c->y0 = get_byte();
+		c->y1 = get_byte();
+		c->y2 = get_byte();
+		c->y3 = get_byte();
+		c->u = c->v = 128;
+		}
+	else			/* colour */
+		{
+		y0 = get_byte();  /* luma */
+		y1 = get_byte();
+		y2 = get_byte();
+		y3 = get_byte();
+		u = 128+get_byte(); /* chroma */
+		v = 128+get_byte();
+
+		/*             YUV * inv(CinYUV)
+		 *  | Y  |   | 1 -0.0655  0.0110 | | CY |
+		 *  | Cb | = | 0  1.1656 -0.0062 | | CU |
+		 *  | Cr |   | 0  0.0467  1.4187 | | CV |
+		 */
+		y_uv = (int)((CU_Y_tab[u] + CV_Y_tab[v]) >> SCALEBITS);
+		c->y0 = uiclp[y0 + y_uv];
+		c->y1 = uiclp[y1 + y_uv];
+		c->y2 = uiclp[y2 + y_uv];
+		c->y3 = uiclp[y3 + y_uv];
+		c->u = uiclp[(int)((CU_Cb_tab[u] + CV_Cb_tab[v]) >> SCALEBITS)];
+		c->v = uiclp[(int)((CU_Cr_tab[u] + CV_Cr_tab[v]) >> SCALEBITS)];
+
+		c->yuy2_v4_u = PACK_YUY2(c, y0, y1, u, v);
+		c->yuy2_v4_l = PACK_YUY2(c, y2, y3, u, v);
+		c->yuy2_v1_ul = PACK_YUY2(c, y0, y0, u, v);
+		c->yuy2_v1_ur = PACK_YUY2(c, y1, y1, u, v);
+		c->yuy2_v1_ll = PACK_YUY2(c, y2, y2, u, v);
+		c->yuy2_v1_lr = PACK_YUY2(c, y3, y3, u, v);
+		}
+}
+
+/* ------------------------------------------------------------------------ */
+inline void cvid_v1_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
+{
+int stride = mpi->stride[0] / 2;
+unsigned long *vptr = (unsigned long *)mpi->planes[0];
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	vptr += (y * mpi->stride[0] + x) / 2;
+
+	vptr[0] = cb->yuy2_v1_ul;
+	vptr[1] = cb->yuy2_v1_ur;
+
+	vptr += stride;
+	vptr[0] = cb->yuy2_v1_ul;
+	vptr[1] = cb->yuy2_v1_ur;
+
+	vptr += stride;
+	vptr[0] = cb->yuy2_v1_ll;
+	vptr[1] = cb->yuy2_v1_lr;
+
+	vptr += stride;
+	vptr[0] = cb->yuy2_v1_ll;
+	vptr[1] = cb->yuy2_v1_lr;
+}
+
+
+/* ------------------------------------------------------------------------ */
+inline void cvid_v4_yuy2(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
+	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
+{
+int stride = mpi->stride[0] / 2;
+unsigned long *vptr = (unsigned long *)mpi->planes[0];
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	vptr += (y * mpi->stride[0] + x) / 2;
+
+	vptr[0] = cb0->yuy2_v4_u;
+	vptr[1] = cb1->yuy2_v4_u;
+
+	vptr += stride;
+	vptr[0] = cb0->yuy2_v4_l;
+	vptr[1] = cb1->yuy2_v4_l;
+
+	vptr += stride;
+	vptr[0] = cb2->yuy2_v4_u;
+	vptr[1] = cb3->yuy2_v4_u;
+
+	vptr += stride;
+	vptr[0] = cb2->yuy2_v4_l;
+	vptr[1] = cb3->yuy2_v4_l;
+}
+
+/* ---------------------------------------------------------------------- */
+static inline void read_codebook_32(cvid_codebook *c, int mode)
+{
+int uvr, uvg, uvb;
+
+	if(mode)		/* black and white */
+		{
+		c->y0 = get_byte();
+		c->y1 = get_byte();
+		c->y2 = get_byte();
+		c->y3 = get_byte();
+		c->u = c->v = 0;
+
+		c->rgb0 = (c->y0 << 16) | (c->y0 << 8) | c->y0;
+		c->rgb1 = (c->y1 << 16) | (c->y1 << 8) | c->y1;
+		c->rgb2 = (c->y2 << 16) | (c->y2 << 8) | c->y2;
+		c->rgb3 = (c->y3 << 16) | (c->y3 << 8) | c->y3;
+		}
+	else			/* colour */
+		{
+		c->y0 = get_byte();  /* luma */
+		c->y1 = get_byte();
+		c->y2 = get_byte();
+		c->y3 = get_byte();
+		c->u = get_byte(); /* chroma */
+		c->v = get_byte();
+
+		uvr = c->v << 1;
+		uvg = -((c->u+1) >> 1) - c->v;
+		uvb = c->u << 1;
+
+		c->rgb0 = le2me_32((uiclp[c->y0 + uvr] << 16) | (uiclp[c->y0 + uvg] << 8) | uiclp[c->y0 + uvb]);
+		c->rgb1 = le2me_32((uiclp[c->y1 + uvr] << 16) | (uiclp[c->y1 + uvg] << 8) | uiclp[c->y1 + uvb]);
+		c->rgb2 = le2me_32((uiclp[c->y2 + uvr] << 16) | (uiclp[c->y2 + uvg] << 8) | uiclp[c->y2 + uvb]);
+		c->rgb3 = le2me_32((uiclp[c->y3 + uvr] << 16) | (uiclp[c->y3 + uvg] << 8) | uiclp[c->y3 + uvb]);
+		}
+}
+
+
+/* ------------------------------------------------------------------------ */
+inline void cvid_v1_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
+{
+int stride = mpi->stride[0];
+unsigned long *vptr = (unsigned long *)mpi->planes[0];
+unsigned long rgb;
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	vptr += (y * stride + x);
+
+	vptr[0] = rgb = cb->rgb0; vptr[1] = rgb;
+	vptr[2] = rgb = cb->rgb1; vptr[3] = rgb;
+	vptr += stride;
+	vptr[0] = rgb = cb->rgb0; vptr[1] = rgb;
+	vptr[2] = rgb = cb->rgb1; vptr[3] = rgb;
+	vptr += stride;
+	vptr[0] = rgb = cb->rgb2; vptr[1] = rgb;
+	vptr[2] = rgb = cb->rgb3; vptr[3] = rgb;
+	vptr += stride;
+	vptr[0] = rgb = cb->rgb2; vptr[1] = rgb;
+	vptr[2] = rgb = cb->rgb3; vptr[3] = rgb;
+}
+
+
+/* ------------------------------------------------------------------------ */
+inline void cvid_v4_32(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
+	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
+{
+int stride = mpi->stride[0];
+unsigned long *vptr = (unsigned long *)mpi->planes[0];
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	vptr += (y * stride + x);
+
+	vptr[0] = cb0->rgb0;
+	vptr[1] = cb0->rgb1;
+	vptr[2] = cb1->rgb0;
+	vptr[3] = cb1->rgb1;
+	vptr += stride;
+	vptr[0] = cb0->rgb2;
+	vptr[1] = cb0->rgb3;
+	vptr[2] = cb1->rgb2;
+	vptr[3] = cb1->rgb3;
+	vptr += stride;
+	vptr[0] = cb2->rgb0;
+	vptr[1] = cb2->rgb1;
+	vptr[2] = cb3->rgb0;
+	vptr[3] = cb3->rgb1;
+	vptr += stride;
+	vptr[0] = cb2->rgb2;
+	vptr[1] = cb2->rgb3;
+	vptr[2] = cb3->rgb2;
+	vptr[3] = cb3->rgb3;
+}
+
+
+/* ---------------------------------------------------------------------- */
+static inline void read_codebook_24(cvid_codebook *c, int mode)
+{
+int uvr, uvg, uvb;
+
+	if(mode)		/* black and white */
+		{
+		c->y0 = get_byte();
+		c->y1 = get_byte();
+		c->y2 = get_byte();
+		c->y3 = get_byte();
+		c->u = c->v = 0;
+
+		c->r[0] = c->g[0] = c->b[0] = c->y0;
+		c->r[1] = c->g[1] = c->b[1] = c->y1;
+		c->r[2] = c->g[2] = c->b[2] = c->y2;
+		c->r[3] = c->g[3] = c->b[3] = c->y3;
+		}
+	else			/* colour */
+		{
+		c->y0 = get_byte();  /* luma */
+		c->y1 = get_byte();
+		c->y2 = get_byte();
+		c->y3 = get_byte();
+		c->u = get_byte(); /* chroma */
+		c->v = get_byte();
+
+		uvr = c->v << 1;
+		uvg = -((c->u+1) >> 1) - c->v;
+		uvb = c->u << 1;
+
+		c->r[0] = uiclp[c->y0 + uvr]; c->g[0] = uiclp[c->y0 + uvg]; c->b[0] = uiclp[c->y0 + uvb];
+		c->r[1] = uiclp[c->y1 + uvr]; c->g[1] = uiclp[c->y1 + uvg]; c->b[1] = uiclp[c->y1 + uvb];
+		c->r[2] = uiclp[c->y2 + uvr]; c->g[2] = uiclp[c->y2 + uvg]; c->b[2] = uiclp[c->y2 + uvb];
+		c->r[3] = uiclp[c->y3 + uvr]; c->g[3] = uiclp[c->y3 + uvg]; c->b[3] = uiclp[c->y3 + uvb];
+		}
+}
+
+
+/* ------------------------------------------------------------------------ */
+inline void cvid_v1_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb)
+{
+unsigned char r, g, b;
+int stride = (mpi->stride[0]-4)*3;
+unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3;
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	*vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+	*vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+	vptr += stride;
+	*vptr++ = b = cb->b[0]; *vptr++ = g = cb->g[0]; *vptr++ = r = cb->r[0];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+	*vptr++ = b = cb->b[1]; *vptr++ = g = cb->g[1]; *vptr++ = r = cb->r[1];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+	vptr += stride;
+	*vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+	*vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+	vptr += stride;
+	*vptr++ = b = cb->b[2]; *vptr++ = g = cb->g[2]; *vptr++ = r = cb->r[2];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+	*vptr++ = b = cb->b[3]; *vptr++ = g = cb->g[3]; *vptr++ = r = cb->r[3];
+	*vptr++ = b; *vptr++ = g; *vptr++ = r;
+}
+
+
+/* ------------------------------------------------------------------------ */
+inline void cvid_v4_24(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
+	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3)
+{
+int stride = (mpi->stride[0]-4)*3;
+unsigned char *vptr = mpi->planes[0] + (y * mpi->stride[0] + x) * 3;
+
+	if(y+3>=(unsigned int)mpi->height) return; // avoid sig11
+
+	*vptr++ = cb0->b[0]; *vptr++ = cb0->g[0]; *vptr++ = cb0->r[0];
+	*vptr++ = cb0->b[1]; *vptr++ = cb0->g[1]; *vptr++ = cb0->r[1];
+	*vptr++ = cb1->b[0]; *vptr++ = cb1->g[0]; *vptr++ = cb1->r[0];
+	*vptr++ = cb1->b[1]; *vptr++ = cb1->g[1]; *vptr++ = cb1->r[1];
+	vptr += stride;
+	*vptr++ = cb0->b[2]; *vptr++ = cb0->g[2]; *vptr++ = cb0->r[2];
+	*vptr++ = cb0->b[3]; *vptr++ = cb0->g[3]; *vptr++ = cb0->r[3];
+	*vptr++ = cb1->b[2]; *vptr++ = cb1->g[2]; *vptr++ = cb1->r[2];
+	*vptr++ = cb1->b[3]; *vptr++ = cb1->g[3]; *vptr++ = cb1->r[3];
+	vptr += stride;
+	*vptr++ = cb2->b[0]; *vptr++ = cb2->g[0]; *vptr++ = cb2->r[0];
+	*vptr++ = cb2->b[1]; *vptr++ = cb2->g[1]; *vptr++ = cb2->r[1];
+	*vptr++ = cb3->b[0]; *vptr++ = cb3->g[0]; *vptr++ = cb3->r[0];
+	*vptr++ = cb3->b[1]; *vptr++ = cb3->g[1]; *vptr++ = cb3->r[1];
+	vptr += stride;
+	*vptr++ = cb2->b[2]; *vptr++ = cb2->g[2]; *vptr++ = cb2->r[2];
+	*vptr++ = cb2->b[3]; *vptr++ = cb2->g[3]; *vptr++ = cb2->r[3];
+	*vptr++ = cb3->b[2]; *vptr++ = cb3->g[2]; *vptr++ = cb3->r[2];
+	*vptr++ = cb3->b[3]; *vptr++ = cb3->g[3]; *vptr++ = cb3->r[3];
+}
+
+/* ------------------------------------------------------------------------
+ * Call this function once at the start of the sequence and save the
+ * returned context for calls to decode_cinepak().
+ */
+void *decode_cinepak_init(void)
+{
+cinepak_info *cvinfo;
+int i, x;
+
+	if((cvinfo = calloc(sizeof(cinepak_info), 1)) == NULL) return NULL;
+	cvinfo->strip_num = 0;
+
+	if(uiclp == NULL)
+		{
+		uiclp = uiclip+512;
+		for(i = -512; i < 512; i++)
+			uiclp[i] = (i < 0 ? 0 : (i > 255 ? 255 : i));
+		}
+
+	for(i = 0, x = -128; i < 256; i++, x++)
+		{
+		CU_Y_tab[i] = (-FIX(0.0655)) * x;
+		CV_Y_tab[i] = (FIX(0.0110)) * x + ONE_HALF;
+		CU_Cb_tab[i] = (FIX(1.1656)) * x;
+		CV_Cb_tab[i] = (-FIX(0.0062)) * x + ONE_HALF + FIX(128);
+		CU_Cr_tab[i] = (FIX(0.0467)) * x;
+		CV_Cr_tab[i] = (FIX(1.4187)) * x + ONE_HALF + FIX(128);
+		}
+
+	return (void *)cvinfo;
+}
+
+
+/* ------------------------------------------------------------------------
+ * This function decodes a buffer containing a Cinepak encoded frame.
+ *
+ * context - the context created by decode_cinepak_init().
+ * buf - the input buffer to be decoded
+ * size - the size of the input buffer
+ * frame - the output frame buffer
+ * width - the width of the output frame
+ * height - the height of the output frame
+ * bit_per_pixel - the number of bits per pixel allocated to the output
+ *   frame; depths support:
+ *     32: BGR32
+ *     24: BGR24
+ *     16: YUY2
+ *     12: YV12
+ */
+void decode_cinepak(void *context, unsigned char *buf, int size, mp_image_t *mpi)
+{
+cinepak_info *cvinfo = (cinepak_info *)context;
+cvid_codebook *v4_codebook, *v1_codebook, *codebook = NULL;
+unsigned long x, y, y_bottom, frame_flags, strips, cv_width, cv_height, cnum,
+	strip_id, chunk_id, x0, y0, x1, y1, ci, flag, mask;
+long len, top_size, chunk_size;
+unsigned int i, cur_strip, d0, d1, d2, d3;
+int modulo;
+void (*read_codebook)(cvid_codebook *c, int mode) = NULL;
+void (*cvid_v1)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb) = NULL;
+void (*cvid_v4)(mp_image_t *mpi, unsigned int x, unsigned int y, cvid_codebook *cb0,
+	cvid_codebook *cb1, cvid_codebook *cb2, cvid_codebook *cb3) = NULL;
+
+	x = y = 0;
+	y_bottom = 0;
+	in_buffer = buf;
+
+	frame_flags = get_byte();
+	len = get_byte() << 16;
+	len |= get_byte() << 8;
+	len |= get_byte();
+
+	switch(mpi->imgfmt)
+		{
+		case IMGFMT_I420:
+		case IMGFMT_IYUV:
+		case IMGFMT_YV12:  // YV12
+			read_codebook = read_codebook_yv12;
+			cvid_v1 = cvid_v1_yv12;
+			cvid_v4 = cvid_v4_yv12;
+			break;
+		case IMGFMT_YUY2:  // YUY2
+			read_codebook = read_codebook_yuy2;
+			cvid_v1 = cvid_v1_yuy2;
+			cvid_v4 = cvid_v4_yuy2;
+			break;
+		case IMGFMT_BGR24:  // BGR24
+			read_codebook = read_codebook_24;
+			cvid_v1 = cvid_v1_24;
+			cvid_v4 = cvid_v4_24;
+			break;
+		case IMGFMT_BGR32:  // BGR32
+			read_codebook = read_codebook_32;
+			cvid_v1 = cvid_v1_32;
+			cvid_v4 = cvid_v4_32;
+			break;
+		}
+
+	if(len != size)
+		{
+		if(len & 0x01) len++; /* AVIs tend to have a size mismatch */
+		if(len != size)
+			{
+			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: corruption %d (QT/AVI) != %ld (CV)\n", size, len);
+			// return;
+			}
+		}
+
+	cv_width = get_word();
+	cv_height = get_word();
+	strips = get_word();
+
+	if(strips > cvinfo->strip_num)
+		{
+		if(strips >= MAX_STRIPS) 
+			{
+			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: strip overflow (more than %d)\n", MAX_STRIPS);
+			return;
+			}
+
+		for(i = cvinfo->strip_num; i < strips; i++)
+			{
+			if((cvinfo->v4_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL)
+				{
+				mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v4 alloc err\n");
+				return;
+				}
+
+			if((cvinfo->v1_codebook[i] = (cvid_codebook *)calloc(sizeof(cvid_codebook), 260)) == NULL)
+				{
+				mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: codebook v1 alloc err\n");
+				return;
+				}
+			}
+		}
+	cvinfo->strip_num = strips;
+
+#if DBUG
+	mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: <%ld,%ld> strips %ld\n", cv_width, cv_height, strips);
+#endif
+
+	for(cur_strip = 0; cur_strip < strips; cur_strip++)
+		{
+		v4_codebook = cvinfo->v4_codebook[cur_strip];
+		v1_codebook = cvinfo->v1_codebook[cur_strip];
+
+		if((cur_strip > 0) && (!(frame_flags & 0x01)))
+			{
+			memcpy(cvinfo->v4_codebook[cur_strip], cvinfo->v4_codebook[cur_strip-1], 260 * sizeof(cvid_codebook));
+			memcpy(cvinfo->v1_codebook[cur_strip], cvinfo->v1_codebook[cur_strip-1], 260 * sizeof(cvid_codebook));
+			}
+
+		strip_id = get_word();		/* 1000 = key strip, 1100 = iter strip */
+		top_size = get_word();
+		y0 = get_word();		/* FIXME: most of these are ignored at the moment */
+		x0 = get_word();
+		y1 = get_word();
+		x1 = get_word();
+
+		y_bottom += y1;
+		top_size -= 12;
+		x = 0;
+//		if(x1 != (unsigned int)mpi->width) 
+//			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: Warning x1 (%ld) != width (%d)\n", x1, mpi->width);
+
+//x1 = mpi->width;
+#if DBUG
+	mp_msg(MSGT_DECVIDEO, MSGL_WARN, "   %d) %04lx %04ld <%ld,%ld> <%ld,%ld> yt %ld  %d\n",
+		cur_strip, strip_id, top_size, x0, y0, x1, y1, y_bottom);
+#endif
+
+		while(top_size > 0)
+			{
+			chunk_id  = get_word();
+			chunk_size = get_word();
+
+#if DBUG
+	mp_msg(MSGT_DECVIDEO, MSGL_WARN, "        %04lx %04ld\n", chunk_id, chunk_size);
+#endif
+			top_size -= chunk_size;
+			chunk_size -= 4;
+
+			switch(chunk_id)
+				{
+					/* -------------------- Codebook Entries -------------------- */
+				case 0x2000:
+				case 0x2200:
+					modulo = chunk_size % 6;
+					codebook = (chunk_id == 0x2200 ? v1_codebook : v4_codebook);
+					cnum = (chunk_size - modulo) / 6;
+					for(i = 0; i < cnum; i++) read_codebook(codebook+i, 0);
+					while (modulo--)
+						in_buffer++;
+					break;
+
+				case 0x2400:
+				case 0x2600:		/* 8 bit per pixel */
+					codebook = (chunk_id == 0x2600 ? v1_codebook : v4_codebook);
+					cnum = chunk_size/4;  
+					for(i = 0; i < cnum; i++) read_codebook(codebook+i, 1);
+					break;
+
+				case 0x2100:
+				case 0x2300:
+					codebook = (chunk_id == 0x2300 ? v1_codebook : v4_codebook);
+
+					ci = 0;
+					while(chunk_size > 3)
+						{
+						flag = get_long();
+						chunk_size -= 4;
+
+						for(i = 0; i < 32; i++)
+							{
+							if(flag & 0x80000000)
+								{
+								chunk_size -= 6;
+								read_codebook(codebook+ci, 0);
+								}
+
+							ci++;
+							flag <<= 1;
+							}
+						}
+					while(chunk_size > 0) { skip_byte(); chunk_size--; }
+					break;
+
+				case 0x2500:
+				case 0x2700:		/* 8 bit per pixel */
+					codebook = (chunk_id == 0x2700 ? v1_codebook : v4_codebook);
+
+					ci = 0;
+					while(chunk_size > 0)
+						{
+						flag = get_long();
+						chunk_size -= 4;
+
+						for(i = 0; i < 32; i++)
+							{
+							if(flag & 0x80000000)
+								{
+								chunk_size -= 4;
+								read_codebook(codebook+ci, 1);
+								}
+
+							ci++;
+							flag <<= 1;
+							}
+						}
+					while(chunk_size > 0) { skip_byte(); chunk_size--; }
+					break;
+
+					/* -------------------- Frame -------------------- */
+				case 0x3000: 
+					while((chunk_size > 0) && (y < y_bottom))
+						{
+						flag = get_long();
+						chunk_size -= 4;
+						
+						for(i = 0; i < 32; i++)
+							{ 
+							if(y >= y_bottom) break;
+							if(flag & 0x80000000)	/* 4 bytes per block */
+								{
+								d0 = get_byte();
+								d1 = get_byte();
+								d2 = get_byte();
+								d3 = get_byte();
+								chunk_size -= 4;
+								cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3);
+								}
+							else		/* 1 byte per block */
+								{
+								cvid_v1(mpi, x, y, v1_codebook + get_byte());
+								chunk_size--;
+								}
+
+							x += 4;
+							if(x >= (unsigned int)x1)
+								{
+								x = 0;
+								y += 4;
+								}
+							flag <<= 1;
+							}
+						}
+					while(chunk_size > 0) { skip_byte(); chunk_size--; }
+					break;
+
+				case 0x3100:
+					while((chunk_size > 0) && (y < y_bottom))
+						{
+							/* ---- flag bits: 0 = SKIP, 10 = V1, 11 = V4 ---- */
+						flag = (unsigned long)get_long();
+						chunk_size -= 4;
+						mask = 0x80000000;
+
+						while((mask) && (y < y_bottom))
+							{
+							if(flag & mask)
+								{
+								if(mask == 1)
+									{
+									if(chunk_size < 0) break;
+									flag = (unsigned long)get_long();
+									chunk_size -= 4;
+									mask = 0x80000000;
+									}
+								else mask >>= 1;
+								
+								if(flag & mask)		/* V4 */
+									{
+									d0 = get_byte();
+									d1 = get_byte();
+									d2 = get_byte();
+									d3 = get_byte();
+									chunk_size -= 4;
+									cvid_v4(mpi, x, y, v4_codebook+d0, v4_codebook+d1, v4_codebook+d2, v4_codebook+d3);
+									}
+								else		/* V1 */
+									{
+									chunk_size--;
+									cvid_v1(mpi, x, y, v1_codebook + get_byte());
+									}
+								}		/* else SKIP */
+
+							mask >>= 1;
+							x += 4;
+							if(x >= (unsigned int)x1)
+								{
+								x = 0;
+								y += 4;
+								}
+							}
+						}
+
+					while(chunk_size > 0) { skip_byte(); chunk_size--; }
+					break;
+
+				case 0x3200:		/* each byte is a V1 codebook */
+					while((chunk_size > 0) && (y < y_bottom))
+						{
+						cvid_v1(mpi, x, y, v1_codebook + get_byte());
+						chunk_size--;
+						x += 4;
+						if(x >= (unsigned int)x1)
+							{
+							x = 0;
+							y += 4;
+							}
+						}
+					while(chunk_size > 0) { skip_byte(); chunk_size--; }
+					break;
+
+				default:
+					mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: unknown chunk_id %08lx\n", chunk_id);
+					while(chunk_size > 0) { skip_byte(); chunk_size--; }
+					break;
+				}
+			}
+		}
+
+	if(len != size)
+		{
+		if(len & 0x01) len++; /* AVIs tend to have a size mismatch */
+		if(len != size)
+			{
+			long xlen;
+			skip_byte();
+			xlen = get_byte() << 16;
+			xlen |= get_byte() << 8;
+			xlen |= get_byte(); /* Read Len */
+			mp_msg(MSGT_DECVIDEO, MSGL_WARN, "CVID: END INFO chunk size %d cvid size1 %ld cvid size2 %ld\n", size, len, xlen);
+			}
+		}
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/cyuv.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,135 @@
+/* ------------------------------------------------------------------------
+ * Creative YUV Video Decoder
+ *
+ * Dr. Tim Ferguson, 2001.
+ * For more details on the algorithm:
+ *         http://www.csse.monash.edu.au/~timf/videocodec.html
+ *
+ * This is a very simple predictive coder.  A video frame is coded in YUV411
+ * format.  The first pixel of each scanline is coded using the upper four
+ * bits of its absolute value.  Subsequent pixels for the scanline are coded
+ * using the difference between the last pixel and the current pixel (DPCM).
+ * The DPCM values are coded using a 16 entry table found at the start of the
+ * frame.  Thus four bits per component are used and are as follows:
+ *     UY VY YY UY VY YY UY VY...
+ * This code assumes the frame width will be a multiple of four pixels.  This
+ * should probably be fixed.
+ * ------------------------------------------------------------------------ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "loader/wine/avifmt.h" // for mmioFOURCC macro
+
+/* ------------------------------------------------------------------------
+ * This function decodes a buffer containing a CYUV encoded frame.
+ *
+ * buf - the input buffer to be decoded
+ * size - the size of the input buffer
+ * frame - the output frame buffer (UYVY format)
+ * width - the width of the output frame
+ * height - the height of the output frame
+ * format - the requested output format
+ */
+void decode_cyuv(unsigned char *buf, int size, unsigned char *frame, int width, int height, int format)
+{
+int i, xpos, ypos, cur_Y = 0, cur_U = 0, cur_V = 0;
+char *delta_y_tbl, *delta_c_tbl, *ptr;
+
+	delta_y_tbl = buf + 16;
+	delta_c_tbl = buf + 32;
+	ptr = buf + (16 * 3);
+
+	for(ypos = 0; ypos < height; ypos++)
+		for(xpos = 0; xpos < width; xpos += 4)
+			{
+			if(xpos == 0)		/* first pixels in scanline */
+				{
+				cur_U = *(ptr++);
+				cur_Y = (cur_U & 0x0f) << 4;
+				cur_U = cur_U & 0xf0;
+				if (format == mmioFOURCC('Y','U','Y','2'))
+					{
+					*frame++ = cur_Y;
+					*frame++ = cur_U;
+					}
+				else
+					{
+					*frame++ = cur_U;
+					*frame++ = cur_Y;
+					}
+
+				cur_V = *(ptr++);
+				cur_Y = (cur_Y + delta_y_tbl[cur_V & 0x0f]) & 0xff;
+				cur_V = cur_V & 0xf0;
+				if (format == mmioFOURCC('Y','U','Y','2'))
+					{
+					*frame++ = cur_Y;
+					*frame++ = cur_V;
+					}
+				else
+					{
+					*frame++ = cur_V;
+					*frame++ = cur_Y;
+					}
+				}
+			else			/* subsequent pixels in scanline */
+				{
+				i = *(ptr++);
+				cur_U = (cur_U + delta_c_tbl[i >> 4]) & 0xff;
+				cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff;
+				if (format == mmioFOURCC('Y','U','Y','2'))
+					{
+					*frame++ = cur_Y;
+					*frame++ = cur_U;
+					}
+				else
+					{
+					*frame++ = cur_U;
+					*frame++ = cur_Y;
+					}
+
+				i = *(ptr++);
+				cur_V = (cur_V + delta_c_tbl[i >> 4]) & 0xff;
+				cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff;
+				if (format == mmioFOURCC('Y','U','Y','2'))
+					{
+					*frame++ = cur_Y;
+					*frame++ = cur_V;
+					}
+				else
+					{
+					*frame++ = cur_V;
+					*frame++ = cur_Y;
+					}
+				}
+
+			i = *(ptr++);
+			cur_Y = (cur_Y + delta_y_tbl[i & 0x0f]) & 0xff;
+			if (format == mmioFOURCC('Y','U','Y','2'))
+				{
+				*frame++ = cur_Y;
+				*frame++ = cur_U;
+				}
+			else
+				{
+				*frame++ = cur_U;
+				*frame++ = cur_Y;
+				}
+
+			cur_Y = (cur_Y + delta_y_tbl[i >> 4]) & 0xff;
+			if (format == mmioFOURCC('Y','U','Y','2'))
+				{
+				*frame++ = cur_Y;
+				*frame++ = cur_V;
+				}
+			else
+				{
+				*frame++ = cur_V;
+				*frame++ = cur_Y;
+				}
+			}
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/fli.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,379 @@
+/*
+    FLI Decoder for MPlayer
+    
+    (C) 2001 Mike Melanson
+    
+    32bpp support (c) alex
+
+    Additional code and bug fixes by Roberto Togni
+
+    For information on the FLI format, as well as various traps to
+    avoid while programming one, visit:
+      http://www.pcisys.net/~melanson/codecs/
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "config.h"
+#include "bswap.h"
+#include "mp_msg.h"
+
+#define LE_16(x) (le2me_16(*(unsigned short *)(x)))
+#define LE_32(x) (le2me_32(*(unsigned int *)(x)))
+
+#define FLI_256_COLOR 4
+#define FLI_DELTA     7
+#define FLI_COLOR     11
+#define FLI_LC        12
+#define FLI_BLACK     13
+#define FLI_BRUN      15
+#define FLI_COPY      16
+#define FLI_MINI      18
+
+// 256 RGB entries; 25% of these bytes will be unused, but it's faster
+// to index 4-byte entries
+#define PALETTE_SIZE 1024
+static unsigned char palette[PALETTE_SIZE];
+
+void *init_fli_decoder(int width, int height)
+{
+  memset(palette, 0, PALETTE_SIZE);
+
+  return malloc(width * height * sizeof (unsigned char));
+}
+
+void decode_fli_frame(
+  unsigned char *encoded,
+  int encoded_size,
+  unsigned char *decoded,
+  int width,
+  int height,
+  int bytes_per_pixel,
+  void *context)
+{
+  int stream_ptr = 0;
+  int stream_ptr_after_color_chunk;
+  int pixel_ptr;
+  int palette_ptr1;
+  int palette_ptr2;
+  unsigned char palette_idx1;
+  unsigned char palette_idx2;
+
+  unsigned int frame_size;
+  int num_chunks;
+
+  unsigned int chunk_size;
+  int chunk_type;
+
+  int i, j;
+
+  int color_packets;
+  int color_changes;
+  int color_scale;
+
+  int lines;
+  int compressed_lines;
+  int starting_line;
+  signed short line_packets;
+  int y_ptr;
+  int line_inc = width * bytes_per_pixel;
+  signed char byte_run;
+  int pixel_skip;
+  int update_whole_frame = 0; // Palette change flag
+  unsigned char *fli_ghost_image = (unsigned char *)context;
+  int ghost_pixel_ptr;
+  int ghost_y_ptr;
+
+  frame_size = LE_32(&encoded[stream_ptr]);
+  stream_ptr += 6;  // skip the magic number
+  num_chunks = LE_16(&encoded[stream_ptr]);
+  stream_ptr += 10;  // skip padding
+
+  // iterate through the chunks
+  frame_size -= 16;
+  while ((frame_size > 0) && (num_chunks > 0))
+  {
+    chunk_size = LE_32(&encoded[stream_ptr]);
+    stream_ptr += 4;
+    chunk_type = LE_16(&encoded[stream_ptr]);
+    stream_ptr += 2;
+
+    switch (chunk_type)
+    {
+    case FLI_256_COLOR:
+    case FLI_COLOR:
+      stream_ptr_after_color_chunk = stream_ptr + chunk_size - 6;
+      if (chunk_type == FLI_COLOR)
+        color_scale = 4;
+      else
+        color_scale = 1;
+      // set up the palette
+      color_packets = LE_16(&encoded[stream_ptr]);
+      stream_ptr += 2;
+      palette_ptr1 = 0;
+      for (i = 0; i < color_packets; i++)
+      {
+        // first byte is how many colors to skip
+        palette_ptr1 += (encoded[stream_ptr++] * 4);
+        // wrap around, for good measure
+        if (palette_ptr1 >= PALETTE_SIZE)
+          palette_ptr1 = 0;
+        // next byte indicates how many entries to change
+        color_changes = encoded[stream_ptr++];
+        // if there are 0 color changes, there are actually 256
+        if (color_changes == 0)
+          color_changes = 256;
+        for (j = 0; j < color_changes; j++)
+        {
+          palette[palette_ptr1++] = encoded[stream_ptr + 2] * color_scale;
+          palette[palette_ptr1++] = encoded[stream_ptr + 1] * color_scale;
+          palette[palette_ptr1++] = encoded[stream_ptr + 0] * color_scale;
+          palette_ptr1++;
+          stream_ptr += 3;
+        }
+      }
+
+      // color chunks sometimes have weird 16-bit alignment issues;
+      // therefore, take the hardline approach and set the stream_ptr
+      // to the value calculate w.r.t. the size specified by the color
+      // chunk header
+      stream_ptr = stream_ptr_after_color_chunk;
+
+      /* Palette has changed, must update frame */
+      update_whole_frame = 1;
+      break;
+
+    case FLI_DELTA:
+      y_ptr = ghost_y_ptr = 0;
+      compressed_lines = LE_16(&encoded[stream_ptr]);
+      stream_ptr += 2;
+      while (compressed_lines > 0)
+      {
+        line_packets = LE_16(&encoded[stream_ptr]);
+        stream_ptr += 2;
+        if (line_packets < 0)
+        {
+          line_packets = -line_packets;
+          y_ptr += (line_packets * line_inc);
+          ghost_y_ptr += (line_packets * width);
+        }
+        else
+        {
+          pixel_ptr = y_ptr;
+          ghost_pixel_ptr = ghost_y_ptr;
+          for (i = 0; i < line_packets; i++)
+          {
+            // account for the skip bytes
+            pixel_skip = encoded[stream_ptr++];
+            pixel_ptr += pixel_skip * bytes_per_pixel;
+            ghost_pixel_ptr += pixel_skip;
+            byte_run = encoded[stream_ptr++];
+            if (byte_run < 0)
+            {
+              byte_run = -byte_run;
+              palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
+              palette_ptr2 = (palette_idx2 = encoded[stream_ptr++]) * 4;
+              for (j = 0; j < byte_run; j++)
+              {
+                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+
+                fli_ghost_image[ghost_pixel_ptr++] = palette_idx2;
+                decoded[pixel_ptr++] = palette[palette_ptr2 + 0];
+                decoded[pixel_ptr++] = palette[palette_ptr2 + 1];
+                decoded[pixel_ptr++] = palette[palette_ptr2 + 2];
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+            }
+            else
+            {
+              for (j = 0; j < byte_run * 2; j++)
+              {
+                palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
+                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+            }
+          }
+          y_ptr += line_inc;
+          ghost_y_ptr += width;
+          compressed_lines--;
+        }
+      }
+      break;
+
+    case FLI_LC:
+      // line compressed
+      starting_line = LE_16(&encoded[stream_ptr]);
+      stream_ptr += 2;
+      y_ptr = starting_line * line_inc;
+      ghost_y_ptr = starting_line * width;
+
+      compressed_lines = LE_16(&encoded[stream_ptr]);
+      stream_ptr += 2;
+      while (compressed_lines > 0)
+      {
+        pixel_ptr = y_ptr;
+        ghost_pixel_ptr = ghost_y_ptr;
+        line_packets = encoded[stream_ptr++];
+        if (line_packets > 0)
+        {
+          for (i = 0; i < line_packets; i++)
+          {
+            // account for the skip bytes
+            pixel_skip = encoded[stream_ptr++];
+            pixel_ptr += pixel_skip * bytes_per_pixel;
+            ghost_pixel_ptr += pixel_skip;
+            byte_run = encoded[stream_ptr++];
+            if (byte_run > 0)
+            {
+              for (j = 0; j < byte_run; j++)
+              {
+                palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
+                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+            }
+            else
+            {
+              byte_run = -byte_run;
+              palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
+              for (j = 0; j < byte_run; j++)
+              {
+                fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
+                decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+            }
+          }
+        }
+
+        y_ptr += line_inc;
+        ghost_y_ptr += width;
+        compressed_lines--;
+      }
+      break;
+
+    case FLI_BLACK:
+      // set the whole frame to color 0 (which is usually black) by
+      // clearing the ghost image and trigger a full frame update
+      memset(fli_ghost_image, 0, width * height * sizeof(unsigned char));
+      update_whole_frame = 1;
+      break;
+
+    case FLI_BRUN:
+      // byte run compression
+      y_ptr = 0;
+      ghost_y_ptr = 0;
+      for (lines = 0; lines < height; lines++)
+      {
+        pixel_ptr = y_ptr;
+        ghost_pixel_ptr = ghost_y_ptr;
+        line_packets = encoded[stream_ptr++];
+        for (i = 0; i < line_packets; i++)
+        {
+          byte_run = encoded[stream_ptr++];
+          if (byte_run > 0)
+          {
+            palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
+            for (j = 0; j < byte_run; j++)
+            {
+              fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
+              decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
+              decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
+              decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
+	      if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+            }
+          }
+          else  // copy bytes if byte_run < 0
+          {
+            byte_run = -byte_run;
+            for (j = 0; j < byte_run; j++)
+            {
+              palette_ptr1 = (palette_idx1 = encoded[stream_ptr++]) * 4;
+              fli_ghost_image[ghost_pixel_ptr++] = palette_idx1;
+              decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
+              decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
+              decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
+	      if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+            }
+          }
+        }
+
+        y_ptr += line_inc;
+        ghost_y_ptr += width;
+      }
+      break;
+
+    case FLI_COPY:
+      // copy the chunk (uncompressed frame) to the ghost image and
+      // schedule the whole frame to be updated
+      if (chunk_size - 6 > width * height)
+      {
+        mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+         "FLI: in chunk FLI_COPY : source data (%d bytes) bigger than image," \
+         " skipping chunk\n",
+         chunk_size - 6);
+         break;
+      }
+      else
+        memcpy(fli_ghost_image, &encoded[stream_ptr], chunk_size - 6);
+      stream_ptr += chunk_size - 6;
+      update_whole_frame = 1;
+      break;
+
+    case FLI_MINI:
+      // sort of a thumbnail? disregard this chunk...
+      stream_ptr += chunk_size - 6;
+      break;
+
+    default:
+      mp_msg (MSGT_DECVIDEO, MSGL_WARN,
+       "FLI: Unrecognized chunk type: %d\n", chunk_type);
+      break;
+    }
+
+    frame_size -= chunk_size;
+    num_chunks--;
+  }
+
+  if (update_whole_frame)
+  {
+    pixel_ptr = ghost_pixel_ptr = 0;
+    while (pixel_ptr < (width * height * bytes_per_pixel))
+    {
+      palette_ptr1 = fli_ghost_image[ghost_pixel_ptr++] * 4;
+      decoded[pixel_ptr++] = palette[palette_ptr1 + 0];
+      decoded[pixel_ptr++] = palette[palette_ptr1 + 1];
+      decoded[pixel_ptr++] = palette[palette_ptr1 + 2];
+      if (bytes_per_pixel == 4) /* 32bpp */
+        pixel_ptr++;
+    }
+  }
+
+  // by the end of the chunk, the stream ptr should equal the frame 
+  // size (minus 1, possibly); if it doesn't, issue a warning
+  if ((stream_ptr != encoded_size) && (stream_ptr != encoded_size - 1))
+    mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+      "  warning: processed FLI chunk where encoded size = %d\n" \
+      "  and final chunk ptr = %d\n",
+      encoded_size, stream_ptr);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/lzoconf.h	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,386 @@
+/* lzoconf.h -- configuration for the LZO real-time data compression library
+
+   This file is part of the LZO real-time data compression library.
+
+   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
+
+   The LZO library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of
+   the License, or (at your option) any later version.
+
+   The LZO library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with the LZO library; see the file COPYING.
+   If not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   Markus F.X.J. Oberhumer
+   <markus.oberhumer@jk.uni-linz.ac.at>
+   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
+ */
+
+
+#ifndef __LZOCONF_H
+#define __LZOCONF_H
+
+#define LZO_VERSION             0x1070
+#define LZO_VERSION_STRING      "1.07"
+#define LZO_VERSION_DATE        "Oct 18 2000"
+
+/* internal Autoconf configuration file - only used when building LZO */
+#if defined(LZO_HAVE_CONFIG_H)
+#  include <config.h>
+#endif
+#include <limits.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/***********************************************************************
+// LZO requires a conforming <limits.h>
+************************************************************************/
+
+#if !defined(CHAR_BIT) || (CHAR_BIT != 8)
+#  error "invalid CHAR_BIT"
+#endif
+#if !defined(UCHAR_MAX) || !defined(UINT_MAX) || !defined(ULONG_MAX)
+#  error "check your compiler installation"
+#endif
+#if (USHRT_MAX < 1) || (UINT_MAX < 1) || (ULONG_MAX < 1)
+#  error "your limits.h macros are broken"
+#endif
+
+/* workaround a cpp bug under hpux 10.20 */
+#define LZO_0xffffffffL     4294967295ul
+
+
+/***********************************************************************
+// architecture defines
+************************************************************************/
+
+#if !defined(__LZO_WIN) && !defined(__LZO_DOS) && !defined(__LZO_OS2)
+#  if defined(__WINDOWS__) || defined(_WINDOWS) || defined(_Windows)
+#    define __LZO_WIN
+#  elif defined(__WIN32__) || defined(_WIN32) || defined(WIN32)
+#    define __LZO_WIN
+#  elif defined(__NT__) || defined(__NT_DLL__) || defined(__WINDOWS_386__)
+#    define __LZO_WIN
+#  elif defined(__DOS__) || defined(__MSDOS__) || defined(MSDOS)
+#    define __LZO_DOS
+#  elif defined(__OS2__) || defined(__OS2V2__) || defined(OS2)
+#    define __LZO_OS2
+#  elif defined(__palmos__)
+#    define __LZO_PALMOS
+#  elif defined(__TOS__) || defined(__atarist__)
+#    define __LZO_TOS
+#  endif
+#endif
+
+#if (UINT_MAX < LZO_0xffffffffL)
+#  if defined(__LZO_WIN)
+#    define __LZO_WIN16
+#  elif defined(__LZO_DOS)
+#    define __LZO_DOS16
+#  elif defined(__LZO_PALMOS)
+#    define __LZO_PALMOS16
+#  elif defined(__LZO_TOS)
+#    define __LZO_TOS16
+#  elif defined(__C166__)
+#  else
+#    error "16-bit target not supported - contact me for porting hints"
+#  endif
+#endif
+
+#if !defined(__LZO_i386)
+#  if defined(__LZO_DOS) || defined(__LZO_WIN16)
+#    define __LZO_i386
+#  elif defined(__i386__) || defined(__386__) || defined(_M_IX86)
+#    define __LZO_i386
+#  endif
+#endif
+
+#if defined(__LZO_STRICT_16BIT)
+#  if (UINT_MAX < LZO_0xffffffffL)
+#    include <lzo16bit.h>
+#  endif
+#endif
+
+/* memory checkers */
+#if !defined(__LZO_CHECKER)
+#  if defined(__BOUNDS_CHECKING_ON)
+#    define __LZO_CHECKER
+#  elif defined(__CHECKER__)
+#    define __LZO_CHECKER
+#  elif defined(__INSURE__)
+#    define __LZO_CHECKER
+#  elif defined(__PURIFY__)
+#    define __LZO_CHECKER
+#  endif
+#endif
+
+
+/***********************************************************************
+// integral and pointer types
+************************************************************************/
+
+/* Integral types with 32 bits or more */
+#if !defined(LZO_UINT32_MAX)
+#  if (UINT_MAX >= LZO_0xffffffffL)
+     typedef unsigned int       lzo_uint32;
+     typedef int                lzo_int32;
+#    define LZO_UINT32_MAX      UINT_MAX
+#    define LZO_INT32_MAX       INT_MAX
+#    define LZO_INT32_MIN       INT_MIN
+#  elif (ULONG_MAX >= LZO_0xffffffffL)
+     typedef unsigned long      lzo_uint32;
+     typedef long               lzo_int32;
+#    define LZO_UINT32_MAX      ULONG_MAX
+#    define LZO_INT32_MAX       LONG_MAX
+#    define LZO_INT32_MIN       LONG_MIN
+#  else
+#    error "lzo_uint32"
+#  endif
+#endif
+
+/* lzo_uint is used like size_t */
+#if !defined(LZO_UINT_MAX)
+#  if (UINT_MAX >= LZO_0xffffffffL)
+     typedef unsigned int       lzo_uint;
+     typedef int                lzo_int;
+#    define LZO_UINT_MAX        UINT_MAX
+#    define LZO_INT_MAX         INT_MAX
+#    define LZO_INT_MIN         INT_MIN
+#  elif (ULONG_MAX >= LZO_0xffffffffL)
+     typedef unsigned long      lzo_uint;
+     typedef long               lzo_int;
+#    define LZO_UINT_MAX        ULONG_MAX
+#    define LZO_INT_MAX         LONG_MAX
+#    define LZO_INT_MIN         LONG_MIN
+#  else
+#    error "lzo_uint"
+#  endif
+#endif
+
+
+/* Memory model that allows to access memory at offsets of lzo_uint. */
+#if !defined(__LZO_MMODEL)
+#  if (LZO_UINT_MAX <= UINT_MAX)
+#    define __LZO_MMODEL
+#  elif defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#    define __LZO_MMODEL        __huge
+#    define LZO_999_UNSUPPORTED
+#  elif defined(__LZO_PALMOS16) || defined(__LZO_TOS16)
+#    define __LZO_MMODEL
+#  else
+#    error "__LZO_MMODEL"
+#  endif
+#endif
+
+/* no typedef here because of const-pointer issues */
+#define lzo_byte                unsigned char __LZO_MMODEL
+#define lzo_bytep               unsigned char __LZO_MMODEL *
+#define lzo_charp               char __LZO_MMODEL *
+#define lzo_voidp               void __LZO_MMODEL *
+#define lzo_shortp              short __LZO_MMODEL *
+#define lzo_ushortp             unsigned short __LZO_MMODEL *
+#define lzo_uint32p             lzo_uint32 __LZO_MMODEL *
+#define lzo_int32p              lzo_int32 __LZO_MMODEL *
+#define lzo_uintp               lzo_uint __LZO_MMODEL *
+#define lzo_intp                lzo_int __LZO_MMODEL *
+#define lzo_voidpp              lzo_voidp __LZO_MMODEL *
+#define lzo_bytepp              lzo_bytep __LZO_MMODEL *
+
+typedef int lzo_bool;
+
+#ifndef lzo_sizeof_dict_t
+#  define lzo_sizeof_dict_t     sizeof(lzo_bytep)
+#endif
+
+
+/***********************************************************************
+// function types
+************************************************************************/
+
+/* linkage */
+#if !defined(__LZO_EXTERN_C)
+#  ifdef __cplusplus
+#    define __LZO_EXTERN_C      extern "C"
+#  else
+#    define __LZO_EXTERN_C      extern
+#  endif
+#endif
+
+/* calling conventions */
+#if !defined(__LZO_CDECL)
+#  if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#    define __LZO_CDECL         __far __cdecl
+#  elif defined(__LZO_i386) && defined(_MSC_VER)
+#    define __LZO_CDECL         __cdecl
+#  elif defined(__LZO_i386) && defined(__WATCOMC__)
+#    define __LZO_CDECL         __near __cdecl
+#  else
+#    define __LZO_CDECL
+#  endif
+#endif
+#if !defined(__LZO_ENTRY)
+#  define __LZO_ENTRY           __LZO_CDECL
+#endif
+
+/* DLL export information */
+#if !defined(__LZO_EXPORT1)
+#  define __LZO_EXPORT1
+#endif
+#if !defined(__LZO_EXPORT2)
+#  define __LZO_EXPORT2
+#endif
+
+/* calling convention for C functions */
+#if !defined(LZO_PUBLIC)
+#  define LZO_PUBLIC(_rettype)  __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_ENTRY
+#endif
+#if !defined(LZO_EXTERN)
+#  define LZO_EXTERN(_rettype)  __LZO_EXTERN_C LZO_PUBLIC(_rettype)
+#endif
+#if !defined(LZO_PRIVATE)
+#  define LZO_PRIVATE(_rettype) static _rettype __LZO_ENTRY
+#endif
+
+/* cdecl calling convention for assembler functions */
+#if !defined(LZO_PUBLIC_CDECL)
+#  define LZO_PUBLIC_CDECL(_rettype) \
+                __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_CDECL
+#endif
+#if !defined(LZO_EXTERN_CDECL)
+#  define LZO_EXTERN_CDECL(_rettype)  __LZO_EXTERN_C LZO_PUBLIC_CDECL(_rettype)
+#endif
+
+
+typedef int
+(__LZO_ENTRY *lzo_compress_t)   ( const lzo_byte *src, lzo_uint  src_len,
+                                        lzo_byte *dst, lzo_uint *dst_len,
+                                        lzo_voidp wrkmem );
+
+typedef int
+(__LZO_ENTRY *lzo_decompress_t) ( const lzo_byte *src, lzo_uint  src_len,
+                                        lzo_byte *dst, lzo_uint *dst_len,
+                                        lzo_voidp wrkmem );
+
+typedef int
+(__LZO_ENTRY *lzo_optimize_t)   (       lzo_byte *src, lzo_uint  src_len,
+                                        lzo_byte *dst, lzo_uint *dst_len,
+                                        lzo_voidp wrkmem );
+
+typedef int
+(__LZO_ENTRY *lzo_compress_dict_t)(const lzo_byte *src, lzo_uint  src_len,
+                                        lzo_byte *dst, lzo_uint *dst_len,
+                                        lzo_voidp wrkmem,
+                                  const lzo_byte *dict, lzo_uint dict_len );
+
+typedef int
+(__LZO_ENTRY *lzo_decompress_dict_t)(const lzo_byte *src, lzo_uint  src_len,
+                                        lzo_byte *dst, lzo_uint *dst_len,
+                                        lzo_voidp wrkmem,
+                                  const lzo_byte *dict, lzo_uint dict_len );
+
+
+/* a progress indicator callback function */
+typedef void (__LZO_ENTRY *lzo_progress_callback_t) (lzo_uint, lzo_uint);
+
+
+/***********************************************************************
+// error codes and prototypes
+************************************************************************/
+
+/* Error codes for the compression/decompression functions. Negative
+ * values are errors, positive values will be used for special but
+ * normal events.
+ */
+#define LZO_E_OK                    0
+#define LZO_E_ERROR                 (-1)
+#define LZO_E_OUT_OF_MEMORY         (-2)    /* not used right now */
+#define LZO_E_NOT_COMPRESSIBLE      (-3)    /* not used right now */
+#define LZO_E_INPUT_OVERRUN         (-4)
+#define LZO_E_OUTPUT_OVERRUN        (-5)
+#define LZO_E_LOOKBEHIND_OVERRUN    (-6)
+#define LZO_E_EOF_NOT_FOUND         (-7)
+#define LZO_E_INPUT_NOT_CONSUMED    (-8)
+
+
+/* lzo_init() should be the first function you call.
+ * Check the return code !
+ *
+ * lzo_init() is a macro to allow checking that the library and the
+ * compiler's view of various types are consistent.
+ */
+#define lzo_init() __lzo_init2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\
+    (int)sizeof(long),(int)sizeof(lzo_uint32),(int)sizeof(lzo_uint),\
+    (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\
+    (int)sizeof(lzo_compress_t))
+LZO_EXTERN(int) __lzo_init2(unsigned,int,int,int,int,int,int,int,int,int);
+
+/* version functions (useful for shared libraries) */
+LZO_EXTERN(unsigned) lzo_version(void);
+LZO_EXTERN(const char *) lzo_version_string(void);
+LZO_EXTERN(const char *) lzo_version_date(void);
+LZO_EXTERN(const lzo_charp) _lzo_version_string(void);
+LZO_EXTERN(const lzo_charp) _lzo_version_date(void);
+
+/* string functions */
+LZO_EXTERN(int)
+lzo_memcmp(const lzo_voidp _s1, const lzo_voidp _s2, lzo_uint _len);
+LZO_EXTERN(lzo_voidp)
+lzo_memcpy(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len);
+LZO_EXTERN(lzo_voidp)
+lzo_memmove(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len);
+LZO_EXTERN(lzo_voidp)
+lzo_memset(lzo_voidp _s, int _c, lzo_uint _len);
+
+/* checksum functions */
+LZO_EXTERN(lzo_uint32)
+lzo_adler32(lzo_uint32 _adler, const lzo_byte *_buf, lzo_uint _len);
+LZO_EXTERN(lzo_uint32)
+lzo_crc32(lzo_uint32 _c, const lzo_byte *_buf, lzo_uint _len);
+
+/* memory allocation functions */
+LZO_EXTERN(lzo_bytep) lzo_alloc(lzo_uint _nelems, lzo_uint _size);
+LZO_EXTERN(lzo_bytep) lzo_malloc(lzo_uint _size);
+LZO_EXTERN(void) lzo_free(lzo_voidp _ptr);
+
+typedef lzo_bytep (__LZO_ENTRY *lzo_alloc_hook_t) (lzo_uint, lzo_uint);
+typedef void (__LZO_ENTRY *lzo_free_hook_t) (lzo_voidp);
+
+extern lzo_alloc_hook_t lzo_alloc_hook;
+extern lzo_free_hook_t lzo_free_hook;
+
+/* misc. */
+LZO_EXTERN(lzo_bool) lzo_assert(int _expr);
+LZO_EXTERN(int) _lzo_config_check(void);
+typedef union { lzo_bytep p; lzo_uint u; } __lzo_pu_u;
+typedef union { lzo_bytep p; lzo_uint32 u32; } __lzo_pu32_u;
+
+/* align a char pointer on a boundary that is a multiple of `size' */
+LZO_EXTERN(unsigned) __lzo_align_gap(const lzo_voidp _ptr, lzo_uint _size);
+#define LZO_PTR_ALIGN_UP(_ptr,_size) \
+    ((_ptr) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(_ptr),(lzo_uint)(_size)))
+
+/* deprecated - only for backward compatibility */
+#define LZO_ALIGN(_ptr,_size) LZO_PTR_ALIGN_UP(_ptr,_size)
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* already included */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/minilzo.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,2848 @@
+/* minilzo.c -- mini subset of the LZO real-time data compression library
+
+   This file is part of the LZO real-time data compression library.
+
+   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
+
+   The LZO library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of
+   the License, or (at your option) any later version.
+
+   The LZO library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with the LZO library; see the file COPYING.
+   If not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   Markus F.X.J. Oberhumer
+   <markus.oberhumer@jk.uni-linz.ac.at>
+   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
+ */
+
+/*
+ * NOTE:
+ *   the full LZO package can be found at
+ *   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
+ */
+
+#define __LZO_IN_MINILZO
+
+#ifdef MINILZO_HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#undef LZO_HAVE_CONFIG_H
+#include "minilzo.h"
+
+#if !defined(MINILZO_VERSION) || (MINILZO_VERSION != 0x1070)
+#  error "version mismatch in miniLZO source files"
+#endif
+
+#ifdef MINILZO_HAVE_CONFIG_H
+#  define LZO_HAVE_CONFIG_H
+#endif
+
+#if !defined(LZO_NO_SYS_TYPES_H)
+#  include <sys/types.h>
+#endif
+#include <stdio.h>
+
+#ifndef __LZO_CONF_H
+#define __LZO_CONF_H
+
+#if !defined(__LZO_IN_MINILZO)
+#  ifndef __LZOCONF_H
+#    include <lzoconf.h>
+#  endif
+#endif
+
+#if defined(__BOUNDS_CHECKING_ON)
+#  include <unchecked.h>
+#else
+#  define BOUNDS_CHECKING_OFF_DURING(stmt)      stmt
+#  define BOUNDS_CHECKING_OFF_IN_EXPR(expr)     (expr)
+#endif
+
+#if !defined(LZO_HAVE_CONFIG_H)
+#  include <stddef.h>
+#  include <string.h>
+#  if !defined(NO_STDLIB_H)
+#    include <stdlib.h>
+#  endif
+#  define HAVE_MEMCMP
+#  define HAVE_MEMCPY
+#  define HAVE_MEMMOVE
+#  define HAVE_MEMSET
+#else
+#  include <sys/types.h>
+#  if defined(STDC_HEADERS)
+#    include <string.h>
+#    include <stdlib.h>
+#  endif
+#  if defined(HAVE_STDDEF_H)
+#    include <stddef.h>
+#  endif
+#  if defined(HAVE_MEMORY_H)
+#    include <memory.h>
+#  endif
+#endif
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#  define HAVE_MALLOC_H
+#  define HAVE_HALLOC
+#endif
+
+#undef NDEBUG
+#if !defined(LZO_DEBUG)
+#  define NDEBUG
+#endif
+#if defined(LZO_DEBUG) || !defined(NDEBUG)
+#  if !defined(NO_STDIO_H)
+#    include <stdio.h>
+#  endif
+#endif
+#include <assert.h>
+
+#if !defined(LZO_UNUSED)
+#  define LZO_UNUSED(parm)  (parm = parm)
+#endif
+
+#if !defined(__inline__) && !defined(__GNUC__)
+#  if defined(__cplusplus)
+#    define __inline__      inline
+#  else
+#    define __inline__
+#  endif
+#endif
+
+#if defined(NO_MEMCMP)
+#  undef HAVE_MEMCMP
+#endif
+
+#if !defined(HAVE_MEMCMP)
+#  undef memcmp
+#  define memcmp    lzo_memcmp
+#endif
+#if !defined(HAVE_MEMCPY)
+#  undef memcpy
+#  define memcpy    lzo_memcpy
+#endif
+#if !defined(HAVE_MEMMOVE)
+#  undef memmove
+#  define memmove   lzo_memmove
+#endif
+#if !defined(HAVE_MEMSET)
+#  undef memset
+#  define memset    lzo_memset
+#endif
+
+#if 1
+#  define LZO_BYTE(x)       ((unsigned char) (x))
+#else
+#  define LZO_BYTE(x)       ((unsigned char) ((x) & 0xff))
+#endif
+#if 0
+#  define LZO_USHORT(x)     ((unsigned short) (x))
+#else
+#  define LZO_USHORT(x)     ((unsigned short) ((x) & 0xffff))
+#endif
+
+#define LZO_MAX(a,b)        ((a) >= (b) ? (a) : (b))
+#define LZO_MIN(a,b)        ((a) <= (b) ? (a) : (b))
+#define LZO_MAX3(a,b,c)     ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c))
+#define LZO_MIN3(a,b,c)     ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c))
+
+#define lzo_sizeof(type)    ((lzo_uint) (sizeof(type)))
+
+#define LZO_HIGH(array)     ((lzo_uint) (sizeof(array)/sizeof(*(array))))
+
+#define LZO_SIZE(bits)      (1u << (bits))
+#define LZO_MASK(bits)      (LZO_SIZE(bits) - 1)
+
+#define LZO_LSIZE(bits)     (1ul << (bits))
+#define LZO_LMASK(bits)     (LZO_LSIZE(bits) - 1)
+
+#define LZO_USIZE(bits)     ((lzo_uint) 1 << (bits))
+#define LZO_UMASK(bits)     (LZO_USIZE(bits) - 1)
+
+#define LZO_STYPE_MAX(b)    (((1l  << (8*(b)-2)) - 1l)  + (1l  << (8*(b)-2)))
+#define LZO_UTYPE_MAX(b)    (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1)))
+
+#if !defined(SIZEOF_UNSIGNED)
+#  if (UINT_MAX == 0xffff)
+#    define SIZEOF_UNSIGNED         2
+#  elif (UINT_MAX == LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED         4
+#  elif (UINT_MAX >= LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED         8
+#  else
+#    error SIZEOF_UNSIGNED
+#  endif
+#endif
+
+#if !defined(SIZEOF_UNSIGNED_LONG)
+#  if (ULONG_MAX == LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED_LONG    4
+#  elif (ULONG_MAX >= LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED_LONG    8
+#  else
+#    error SIZEOF_UNSIGNED_LONG
+#  endif
+#endif
+
+#if !defined(SIZEOF_SIZE_T)
+#  define SIZEOF_SIZE_T             SIZEOF_UNSIGNED
+#endif
+#if !defined(SIZE_T_MAX)
+#  define SIZE_T_MAX                LZO_UTYPE_MAX(SIZEOF_SIZE_T)
+#endif
+
+#if 1 && defined(__LZO_i386) && (UINT_MAX == LZO_0xffffffffL)
+#  if !defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX == 0xffff)
+#    define LZO_UNALIGNED_OK_2
+#  endif
+#  if !defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX == LZO_0xffffffffL)
+#    define LZO_UNALIGNED_OK_4
+#  endif
+#endif
+
+#if defined(LZO_UNALIGNED_OK_2) || defined(LZO_UNALIGNED_OK_4)
+#  if !defined(LZO_UNALIGNED_OK)
+#    define LZO_UNALIGNED_OK
+#  endif
+#endif
+
+#if defined(__LZO_NO_UNALIGNED)
+#  undef LZO_UNALIGNED_OK
+#  undef LZO_UNALIGNED_OK_2
+#  undef LZO_UNALIGNED_OK_4
+#endif
+
+#if defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX != 0xffff)
+#  error "LZO_UNALIGNED_OK_2 must not be defined on this system"
+#endif
+#if defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
+#  error "LZO_UNALIGNED_OK_4 must not be defined on this system"
+#endif
+
+#if defined(__LZO_NO_ALIGNED)
+#  undef LZO_ALIGNED_OK_4
+#endif
+
+#if defined(LZO_ALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
+#  error "LZO_ALIGNED_OK_4 must not be defined on this system"
+#endif
+
+#define LZO_LITTLE_ENDIAN       1234
+#define LZO_BIG_ENDIAN          4321
+#define LZO_PDP_ENDIAN          3412
+
+#if !defined(LZO_BYTE_ORDER)
+#  if defined(MFX_BYTE_ORDER)
+#    define LZO_BYTE_ORDER      MFX_BYTE_ORDER
+#  elif defined(__LZO_i386)
+#    define LZO_BYTE_ORDER      LZO_LITTLE_ENDIAN
+#  elif defined(BYTE_ORDER)
+#    define LZO_BYTE_ORDER      BYTE_ORDER
+#  elif defined(__BYTE_ORDER)
+#    define LZO_BYTE_ORDER      __BYTE_ORDER
+#  endif
+#endif
+
+#if defined(LZO_BYTE_ORDER)
+#  if (LZO_BYTE_ORDER != LZO_LITTLE_ENDIAN) && \
+      (LZO_BYTE_ORDER != LZO_BIG_ENDIAN)
+#    error "invalid LZO_BYTE_ORDER"
+#  endif
+#endif
+
+#if defined(LZO_UNALIGNED_OK) && !defined(LZO_BYTE_ORDER)
+#  error "LZO_BYTE_ORDER is not defined"
+#endif
+
+#define LZO_OPTIMIZE_GNUC_i386_IS_BUGGY
+
+#if defined(NDEBUG) && !defined(LZO_DEBUG) && !defined(__LZO_CHECKER)
+#  if defined(__GNUC__) && defined(__i386__)
+#    if !defined(LZO_OPTIMIZE_GNUC_i386_IS_BUGGY)
+#      define LZO_OPTIMIZE_GNUC_i386
+#    endif
+#  endif
+#endif
+
+__LZO_EXTERN_C int __lzo_init_done;
+__LZO_EXTERN_C const lzo_byte __lzo_copyright[];
+LZO_EXTERN(const lzo_byte *) lzo_copyright(void);
+__LZO_EXTERN_C const lzo_uint32 _lzo_crc32_table[256];
+
+#define _LZO_STRINGIZE(x)           #x
+#define _LZO_MEXPAND(x)             _LZO_STRINGIZE(x)
+
+#define _LZO_CONCAT2(a,b)           a ## b
+#define _LZO_CONCAT3(a,b,c)         a ## b ## c
+#define _LZO_CONCAT4(a,b,c,d)       a ## b ## c ## d
+#define _LZO_CONCAT5(a,b,c,d,e)     a ## b ## c ## d ## e
+
+#define _LZO_ECONCAT2(a,b)          _LZO_CONCAT2(a,b)
+#define _LZO_ECONCAT3(a,b,c)        _LZO_CONCAT3(a,b,c)
+#define _LZO_ECONCAT4(a,b,c,d)      _LZO_CONCAT4(a,b,c,d)
+#define _LZO_ECONCAT5(a,b,c,d,e)    _LZO_CONCAT5(a,b,c,d,e)
+
+#if 0
+
+#define __LZO_IS_COMPRESS_QUERY(i,il,o,ol,w)    ((lzo_voidp)(o) == (w))
+#define __LZO_QUERY_COMPRESS(i,il,o,ol,w,n,s) \
+		(*ol = (n)*(s), LZO_E_OK)
+
+#define __LZO_IS_DECOMPRESS_QUERY(i,il,o,ol,w)  ((lzo_voidp)(o) == (w))
+#define __LZO_QUERY_DECOMPRESS(i,il,o,ol,w,n,s) \
+		(*ol = (n)*(s), LZO_E_OK)
+
+#define __LZO_IS_OPTIMIZE_QUERY(i,il,o,ol,w)    ((lzo_voidp)(o) == (w))
+#define __LZO_QUERY_OPTIMIZE(i,il,o,ol,w,n,s) \
+		(*ol = (n)*(s), LZO_E_OK)
+
+#endif
+
+#ifndef __LZO_PTR_H
+#define __LZO_PTR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#  include <dos.h>
+#  if 1 && defined(__WATCOMC__)
+#    include <i86.h>
+     __LZO_EXTERN_C unsigned char _HShift;
+#    define __LZO_HShift    _HShift
+#  elif 1 && defined(_MSC_VER)
+     __LZO_EXTERN_C unsigned short __near _AHSHIFT;
+#    define __LZO_HShift    ((unsigned) &_AHSHIFT)
+#  elif defined(__LZO_WIN16)
+#    define __LZO_HShift    3
+#  else
+#    define __LZO_HShift    12
+#  endif
+#  if !defined(_FP_SEG) && defined(FP_SEG)
+#    define _FP_SEG         FP_SEG
+#  endif
+#  if !defined(_FP_OFF) && defined(FP_OFF)
+#    define _FP_OFF         FP_OFF
+#  endif
+#endif
+
+#if (UINT_MAX >= LZO_0xffffffffL)
+   typedef ptrdiff_t            lzo_ptrdiff_t;
+#else
+   typedef long                 lzo_ptrdiff_t;
+#endif
+
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(lzo_ptr_t)
+#    define __LZO_HAVE_PTR_T
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_LONG)
+#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_LONG)
+       typedef unsigned long    lzo_ptr_t;
+       typedef long             lzo_sptr_t;
+#      define __LZO_HAVE_PTR_T
+#    endif
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED)
+#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED)
+       typedef unsigned int     lzo_ptr_t;
+       typedef int              lzo_sptr_t;
+#      define __LZO_HAVE_PTR_T
+#    endif
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_SHORT)
+#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_SHORT)
+       typedef unsigned short   lzo_ptr_t;
+       typedef short            lzo_sptr_t;
+#      define __LZO_HAVE_PTR_T
+#    endif
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(LZO_HAVE_CONFIG_H) || defined(SIZEOF_CHAR_P)
+#    error "no suitable type for lzo_ptr_t"
+#  else
+     typedef unsigned long      lzo_ptr_t;
+     typedef long               lzo_sptr_t;
+#    define __LZO_HAVE_PTR_T
+#  endif
+#endif
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#define PTR(a)              ((lzo_bytep) (a))
+#define PTR_ALIGNED_4(a)    ((_FP_OFF(a) & 3) == 0)
+#define PTR_ALIGNED2_4(a,b) (((_FP_OFF(a) | _FP_OFF(b)) & 3) == 0)
+#else
+#define PTR(a)              ((lzo_ptr_t) (a))
+#define PTR_LINEAR(a)       PTR(a)
+#define PTR_ALIGNED_4(a)    ((PTR_LINEAR(a) & 3) == 0)
+#define PTR_ALIGNED_8(a)    ((PTR_LINEAR(a) & 7) == 0)
+#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0)
+#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0)
+#endif
+
+#define PTR_LT(a,b)         (PTR(a) < PTR(b))
+#define PTR_GE(a,b)         (PTR(a) >= PTR(b))
+#define PTR_DIFF(a,b)       ((lzo_ptrdiff_t) (PTR(a) - PTR(b)))
+
+LZO_EXTERN(lzo_ptr_t)
+__lzo_ptr_linear(const lzo_voidp ptr);
+
+typedef union
+{
+    char            a_char;
+    unsigned char   a_uchar;
+    short           a_short;
+    unsigned short  a_ushort;
+    int             a_int;
+    unsigned int    a_uint;
+    long            a_long;
+    unsigned long   a_ulong;
+    lzo_int         a_lzo_int;
+    lzo_uint        a_lzo_uint;
+    lzo_int32       a_lzo_int32;
+    lzo_uint32      a_lzo_uint32;
+    ptrdiff_t       a_ptrdiff_t;
+    lzo_ptrdiff_t   a_lzo_ptrdiff_t;
+    lzo_ptr_t       a_lzo_ptr_t;
+    char *          a_charp;
+    lzo_bytep       a_lzo_bytep;
+    lzo_bytepp      a_lzo_bytepp;
+}
+lzo_align_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+#define LZO_DETERMINISTIC
+
+#define LZO_DICT_USE_PTR
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16) || defined(__LZO_STRICT_16BIT)
+#  undef LZO_DICT_USE_PTR
+#endif
+
+#if defined(LZO_DICT_USE_PTR)
+#  define lzo_dict_t    const lzo_bytep
+#  define lzo_dict_p    lzo_dict_t __LZO_MMODEL *
+#else
+#  define lzo_dict_t    lzo_uint
+#  define lzo_dict_p    lzo_dict_t __LZO_MMODEL *
+#endif
+
+#if !defined(lzo_moff_t)
+#define lzo_moff_t      lzo_uint
+#endif
+
+#endif
+
+LZO_PUBLIC(lzo_ptr_t)
+__lzo_ptr_linear(const lzo_voidp ptr)
+{
+    lzo_ptr_t p;
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+    p = (((lzo_ptr_t)(_FP_SEG(ptr))) << (16 - __LZO_HShift)) + (_FP_OFF(ptr));
+#else
+    p = PTR_LINEAR(ptr);
+#endif
+
+    return p;
+}
+
+LZO_PUBLIC(unsigned)
+__lzo_align_gap(const lzo_voidp ptr, lzo_uint size)
+{
+    lzo_ptr_t p, s, n;
+
+    assert(size > 0);
+
+    p = __lzo_ptr_linear(ptr);
+    s = (lzo_ptr_t) (size - 1);
+#if 0
+    assert((size & (size - 1)) == 0);
+    n = ((p + s) & ~s) - p;
+#else
+    n = (((p + s) / size) * size) - p;
+#endif
+
+    assert((long)n >= 0);
+    assert(n <= s);
+
+    return (unsigned)n;
+}
+
+#ifndef __LZO_UTIL_H
+#define __LZO_UTIL_H
+
+#ifndef __LZO_CONF_H
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if 1 && defined(HAVE_MEMCPY)
+#if !defined(__LZO_DOS16) && !defined(__LZO_WIN16)
+
+#define MEMCPY8_DS(dest,src,len) \
+    memcpy(dest,src,len); \
+    dest += len; \
+    src += len
+
+#endif
+#endif
+
+#if 0 && !defined(MEMCPY8_DS)
+
+#define MEMCPY8_DS(dest,src,len) \
+    { do { \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	len -= 8; \
+    } while (len > 0); }
+
+#endif
+
+#if !defined(MEMCPY8_DS)
+
+#define MEMCPY8_DS(dest,src,len) \
+    { register lzo_uint __l = (len) / 8; \
+    do { \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+    } while (--__l > 0); }
+
+#endif
+
+#define MEMCPY_DS(dest,src,len) \
+    do *dest++ = *src++; \
+    while (--len > 0)
+
+#define MEMMOVE_DS(dest,src,len) \
+    do *dest++ = *src++; \
+    while (--len > 0)
+
+#if 0 && defined(LZO_OPTIMIZE_GNUC_i386)
+
+#define BZERO8_PTR(s,l,n) \
+__asm__ __volatile__( \
+    "movl  %0,%%eax \n"             \
+    "movl  %1,%%edi \n"             \
+    "movl  %2,%%ecx \n"             \
+    "cld \n"                        \
+    "rep \n"                        \
+    "stosl %%eax,(%%edi) \n"        \
+    :               \
+    :"g" (0),"g" (s),"g" (n)        \
+    :"eax","edi","ecx", "memory", "cc" \
+)
+
+#elif (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET)
+
+#if 1
+#define BZERO8_PTR(s,l,n)   memset((s),0,(lzo_uint)(l)*(n))
+#else
+#define BZERO8_PTR(s,l,n)   memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n))
+#endif
+
+#else
+
+#define BZERO8_PTR(s,l,n) \
+    lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n))
+
+#endif
+
+#if 0
+#if defined(__GNUC__) && defined(__i386__)
+
+unsigned char lzo_rotr8(unsigned char value, int shift);
+extern __inline__ unsigned char lzo_rotr8(unsigned char value, int shift)
+{
+    unsigned char result;
+
+    __asm__ __volatile__ ("movb %b1, %b0; rorb %b2, %b0"
+			: "=a"(result) : "g"(value), "c"(shift));
+    return result;
+}
+
+unsigned short lzo_rotr16(unsigned short value, int shift);
+extern __inline__ unsigned short lzo_rotr16(unsigned short value, int shift)
+{
+    unsigned short result;
+
+    __asm__ __volatile__ ("movw %b1, %b0; rorw %b2, %b0"
+			: "=a"(result) : "g"(value), "c"(shift));
+    return result;
+}
+
+#endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+LZO_PUBLIC(lzo_bool)
+lzo_assert(int expr)
+{
+    return (expr) ? 1 : 0;
+}
+
+/* If you use the LZO library in a product, you *must* keep this
+ * copyright string in the executable of your product.
+ */
+
+const lzo_byte __lzo_copyright[] =
+#if !defined(__LZO_IN_MINLZO)
+    LZO_VERSION_STRING;
+#else
+    "\n\n\n"
+    "LZO real-time data compression library.\n"
+    "Copyright (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer\n"
+    "<markus.oberhumer@jk.uni-linz.ac.at>\n"
+    "http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html\n"
+    "\n"
+    "LZO version: v" LZO_VERSION_STRING ", " LZO_VERSION_DATE "\n"
+    "LZO build date: " __DATE__ " " __TIME__ "\n\n"
+    "LZO special compilation options:\n"
+#ifdef __cplusplus
+    " __cplusplus\n"
+#endif
+#if defined(__PIC__)
+    " __PIC__\n"
+#elif defined(__pic__)
+    " __pic__\n"
+#endif
+#if (UINT_MAX < LZO_0xffffffffL)
+    " 16BIT\n"
+#endif
+#if defined(__LZO_STRICT_16BIT)
+    " __LZO_STRICT_16BIT\n"
+#endif
+#if (UINT_MAX > LZO_0xffffffffL)
+    " UINT_MAX=" _LZO_MEXPAND(UINT_MAX) "\n"
+#endif
+#if (ULONG_MAX > LZO_0xffffffffL)
+    " ULONG_MAX=" _LZO_MEXPAND(ULONG_MAX) "\n"
+#endif
+#if defined(LZO_BYTE_ORDER)
+    " LZO_BYTE_ORDER=" _LZO_MEXPAND(LZO_BYTE_ORDER) "\n"
+#endif
+#if defined(LZO_UNALIGNED_OK_2)
+    " LZO_UNALIGNED_OK_2\n"
+#endif
+#if defined(LZO_UNALIGNED_OK_4)
+    " LZO_UNALIGNED_OK_4\n"
+#endif
+#if defined(LZO_ALIGNED_OK_4)
+    " LZO_ALIGNED_OK_4\n"
+#endif
+#if defined(LZO_DICT_USE_PTR)
+    " LZO_DICT_USE_PTR\n"
+#endif
+#if defined(__LZO_QUERY_COMPRESS)
+    " __LZO_QUERY_COMPRESS\n"
+#endif
+#if defined(__LZO_QUERY_DECOMPRESS)
+    " __LZO_QUERY_DECOMPRESS\n"
+#endif
+#if defined(__LZO_IN_MINILZO)
+    " __LZO_IN_MINILZO\n"
+#endif
+    "\n\n"
+    "$Id: LZO " LZO_VERSION_STRING " built " __DATE__ " " __TIME__
+#if defined(__GNUC__) && defined(__VERSION__)
+    " by gcc " __VERSION__
+#elif defined(__BORLANDC__)
+    " by Borland C " _LZO_MEXPAND(__BORLANDC__)
+#elif defined(_MSC_VER)
+    " by Microsoft C " _LZO_MEXPAND(_MSC_VER)
+#elif defined(__PUREC__)
+    " by Pure C " _LZO_MEXPAND(__PUREC__)
+#elif defined(__SC__)
+    " by Symantec C " _LZO_MEXPAND(__SC__)
+#elif defined(__TURBOC__)
+    " by Turbo C " _LZO_MEXPAND(__TURBOC__)
+#elif defined(__WATCOMC__)
+    " by Watcom C " _LZO_MEXPAND(__WATCOMC__)
+#endif
+    " $\n"
+    "$Copyright: LZO (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer $\n";
+#endif
+
+LZO_PUBLIC(const lzo_byte *)
+lzo_copyright(void)
+{
+    return __lzo_copyright;
+}
+
+LZO_PUBLIC(unsigned)
+lzo_version(void)
+{
+    return LZO_VERSION;
+}
+
+LZO_PUBLIC(const char *)
+lzo_version_string(void)
+{
+    return LZO_VERSION_STRING;
+}
+
+LZO_PUBLIC(const char *)
+lzo_version_date(void)
+{
+    return LZO_VERSION_DATE;
+}
+
+LZO_PUBLIC(const lzo_charp)
+_lzo_version_string(void)
+{
+    return LZO_VERSION_STRING;
+}
+
+LZO_PUBLIC(const lzo_charp)
+_lzo_version_date(void)
+{
+    return LZO_VERSION_DATE;
+}
+
+#define LZO_BASE 65521u
+#define LZO_NMAX 5552
+
+#define LZO_DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define LZO_DO2(buf,i)  LZO_DO1(buf,i); LZO_DO1(buf,i+1);
+#define LZO_DO4(buf,i)  LZO_DO2(buf,i); LZO_DO2(buf,i+2);
+#define LZO_DO8(buf,i)  LZO_DO4(buf,i); LZO_DO4(buf,i+4);
+#define LZO_DO16(buf,i) LZO_DO8(buf,i); LZO_DO8(buf,i+8);
+
+LZO_PUBLIC(lzo_uint32)
+lzo_adler32(lzo_uint32 adler, const lzo_byte *buf, lzo_uint len)
+{
+    lzo_uint32 s1 = adler & 0xffff;
+    lzo_uint32 s2 = (adler >> 16) & 0xffff;
+    int k;
+
+    if (buf == NULL)
+	return 1;
+
+    while (len > 0)
+    {
+	k = len < LZO_NMAX ? (int) len : LZO_NMAX;
+	len -= k;
+	if (k >= 16) do
+	{
+	    LZO_DO16(buf,0);
+	    buf += 16;
+	    k -= 16;
+	} while (k >= 16);
+	if (k != 0) do
+	{
+	    s1 += *buf++;
+	    s2 += s1;
+	} while (--k > 0);
+	s1 %= LZO_BASE;
+	s2 %= LZO_BASE;
+    }
+    return (s2 << 16) | s1;
+}
+
+LZO_PUBLIC(int)
+lzo_memcmp(const lzo_voidp s1, const lzo_voidp s2, lzo_uint len)
+{
+#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCMP)
+    return memcmp(s1,s2,len);
+#else
+    const lzo_byte *p1 = (const lzo_byte *) s1;
+    const lzo_byte *p2 = (const lzo_byte *) s2;
+    int d;
+
+    if (len > 0) do
+    {
+	d = *p1 - *p2;
+	if (d != 0)
+	    return d;
+	p1++;
+	p2++;
+    }
+    while (--len > 0);
+    return 0;
+#endif
+}
+
+LZO_PUBLIC(lzo_voidp)
+lzo_memcpy(lzo_voidp dest, const lzo_voidp src, lzo_uint len)
+{
+#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCPY)
+    return memcpy(dest,src,len);
+#else
+    lzo_byte *p1 = (lzo_byte *) dest;
+    const lzo_byte *p2 = (const lzo_byte *) src;
+
+    if (len <= 0 || p1 == p2)
+	return dest;
+    do
+	*p1++ = *p2++;
+    while (--len > 0);
+    return dest;
+#endif
+}
+
+LZO_PUBLIC(lzo_voidp)
+lzo_memmove(lzo_voidp dest, const lzo_voidp src, lzo_uint len)
+{
+#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMMOVE)
+    return memmove(dest,src,len);
+#else
+    lzo_byte *p1 = (lzo_byte *) dest;
+    const lzo_byte *p2 = (const lzo_byte *) src;
+
+    if (len <= 0 || p1 == p2)
+	return dest;
+
+    if (p1 < p2)
+    {
+	do
+	    *p1++ = *p2++;
+	while (--len > 0);
+    }
+    else
+    {
+	p1 += len;
+	p2 += len;
+	do
+	    *--p1 = *--p2;
+	while (--len > 0);
+    }
+    return dest;
+#endif
+}
+
+LZO_PUBLIC(lzo_voidp)
+lzo_memset(lzo_voidp s, int c, lzo_uint len)
+{
+#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET)
+    return memset(s,c,len);
+#else
+    lzo_byte *p = (lzo_byte *) s;
+
+    if (len > 0) do
+	*p++ = LZO_BYTE(c);
+    while (--len > 0);
+    return s;
+#endif
+}
+
+#include <stdio.h>
+
+#if 0
+#  define IS_SIGNED(type)       (((type) (1ul << (8 * sizeof(type) - 1))) < 0)
+#  define IS_UNSIGNED(type)     (((type) (1ul << (8 * sizeof(type) - 1))) > 0)
+#else
+#  define IS_SIGNED(type)       (((type) (-1)) < ((type) 0))
+#  define IS_UNSIGNED(type)     (((type) (-1)) > ((type) 0))
+#endif
+
+static lzo_bool schedule_insns_bug(void);
+static lzo_bool strength_reduce_bug(int *);
+
+#if 0 || defined(LZO_DEBUG)
+static lzo_bool __lzo_assert_fail(const char *s, unsigned line)
+{
+#if defined(__palmos__)
+    printf("LZO assertion failed in line %u: '%s'\n",line,s);
+#else
+    fprintf(stderr,"LZO assertion failed in line %u: '%s'\n",line,s);
+#endif
+    return 0;
+}
+#  define __lzo_assert(x)   ((x) ? 1 : __lzo_assert_fail(#x,__LINE__))
+#else
+#  define __lzo_assert(x)   ((x) ? 1 : 0)
+#endif
+
+static lzo_bool basic_integral_check(void)
+{
+    lzo_bool r = 1;
+    lzo_bool sanity;
+
+    r &= __lzo_assert(CHAR_BIT == 8);
+    r &= __lzo_assert(sizeof(char) == 1);
+    r &= __lzo_assert(sizeof(short) >= 2);
+    r &= __lzo_assert(sizeof(long) >= 4);
+    r &= __lzo_assert(sizeof(int) >= sizeof(short));
+    r &= __lzo_assert(sizeof(long) >= sizeof(int));
+
+    r &= __lzo_assert(sizeof(lzo_uint32) >= 4);
+    r &= __lzo_assert(sizeof(lzo_uint32) >= sizeof(unsigned));
+#if defined(__LZO_STRICT_16BIT)
+    r &= __lzo_assert(sizeof(lzo_uint) == 2);
+#else
+    r &= __lzo_assert(sizeof(lzo_uint) >= 4);
+    r &= __lzo_assert(sizeof(lzo_uint) >= sizeof(unsigned));
+#endif
+
+#if defined(SIZEOF_UNSIGNED)
+    r &= __lzo_assert(SIZEOF_UNSIGNED == sizeof(unsigned));
+#endif
+#if defined(SIZEOF_UNSIGNED_LONG)
+    r &= __lzo_assert(SIZEOF_UNSIGNED_LONG == sizeof(unsigned long));
+#endif
+#if defined(SIZEOF_UNSIGNED_SHORT)
+    r &= __lzo_assert(SIZEOF_UNSIGNED_SHORT == sizeof(unsigned short));
+#endif
+#if !defined(__LZO_IN_MINILZO)
+#if defined(SIZEOF_SIZE_T)
+    r &= __lzo_assert(SIZEOF_SIZE_T == sizeof(size_t));
+#endif
+#endif
+
+    sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) &&
+	     IS_UNSIGNED(unsigned long) &&
+	     IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long);
+    if (sanity)
+    {
+	r &= __lzo_assert(IS_UNSIGNED(lzo_uint32));
+	r &= __lzo_assert(IS_UNSIGNED(lzo_uint));
+	r &= __lzo_assert(IS_SIGNED(lzo_int32));
+	r &= __lzo_assert(IS_SIGNED(lzo_int));
+
+	r &= __lzo_assert(INT_MAX    == LZO_STYPE_MAX(sizeof(int)));
+	r &= __lzo_assert(UINT_MAX   == LZO_UTYPE_MAX(sizeof(unsigned)));
+	r &= __lzo_assert(LONG_MAX   == LZO_STYPE_MAX(sizeof(long)));
+	r &= __lzo_assert(ULONG_MAX  == LZO_UTYPE_MAX(sizeof(unsigned long)));
+	r &= __lzo_assert(SHRT_MAX   == LZO_STYPE_MAX(sizeof(short)));
+	r &= __lzo_assert(USHRT_MAX  == LZO_UTYPE_MAX(sizeof(unsigned short)));
+	r &= __lzo_assert(LZO_UINT32_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint32)));
+	r &= __lzo_assert(LZO_UINT_MAX   == LZO_UTYPE_MAX(sizeof(lzo_uint)));
+#if !defined(__LZO_IN_MINILZO)
+	r &= __lzo_assert(SIZE_T_MAX     == LZO_UTYPE_MAX(sizeof(size_t)));
+#endif
+    }
+
+#if 0
+    r &= __lzo_assert(LZO_BYTE(257) == 1);
+    r &= __lzo_assert(LZO_USHORT(65537L) == 1);
+#endif
+
+    return r;
+}
+
+static lzo_bool basic_ptr_check(void)
+{
+    lzo_bool r = 1;
+    lzo_bool sanity;
+
+    r &= __lzo_assert(sizeof(char *) >= sizeof(int));
+    r &= __lzo_assert(sizeof(lzo_byte *) >= sizeof(char *));
+
+    r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_byte *));
+    r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_voidpp));
+    r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_bytepp));
+    r &= __lzo_assert(sizeof(lzo_voidp) >= sizeof(lzo_uint));
+
+    r &= __lzo_assert(sizeof(lzo_ptr_t) == sizeof(lzo_voidp));
+    r &= __lzo_assert(sizeof(lzo_ptr_t) >= sizeof(lzo_uint));
+
+    r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= 4);
+    r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= sizeof(ptrdiff_t));
+
+#if defined(SIZEOF_CHAR_P)
+    r &= __lzo_assert(SIZEOF_CHAR_P == sizeof(char *));
+#endif
+#if defined(SIZEOF_PTRDIFF_T)
+    r &= __lzo_assert(SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t));
+#endif
+
+    sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) &&
+	     IS_UNSIGNED(unsigned long) &&
+	     IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long);
+    if (sanity)
+    {
+	r &= __lzo_assert(IS_UNSIGNED(lzo_ptr_t));
+	r &= __lzo_assert(IS_UNSIGNED(lzo_moff_t));
+	r &= __lzo_assert(IS_SIGNED(lzo_ptrdiff_t));
+	r &= __lzo_assert(IS_SIGNED(lzo_sptr_t));
+    }
+
+    return r;
+}
+
+static lzo_bool ptr_check(void)
+{
+    lzo_bool r = 1;
+    int i;
+    char _wrkmem[10 * sizeof(lzo_byte *) + sizeof(lzo_align_t)];
+    lzo_byte *wrkmem;
+    lzo_bytepp dict;
+    unsigned char x[4 * sizeof(lzo_align_t)];
+    long d;
+    lzo_align_t a;
+
+    for (i = 0; i < (int) sizeof(x); i++)
+	x[i] = LZO_BYTE(i);
+
+    wrkmem = LZO_PTR_ALIGN_UP((lzo_byte *)_wrkmem,sizeof(lzo_align_t));
+    dict = (lzo_bytepp) wrkmem;
+
+    d = (long) ((const lzo_bytep) dict - (const lzo_bytep) _wrkmem);
+    r &= __lzo_assert(d >= 0);
+    r &= __lzo_assert(d < (long) sizeof(lzo_align_t));
+
+    memset(&a,0xff,sizeof(a));
+    r &= __lzo_assert(a.a_ushort == USHRT_MAX);
+    r &= __lzo_assert(a.a_uint == UINT_MAX);
+    r &= __lzo_assert(a.a_ulong == ULONG_MAX);
+    r &= __lzo_assert(a.a_lzo_uint == LZO_UINT_MAX);
+
+    if (r == 1)
+    {
+	for (i = 0; i < 8; i++)
+	    r &= __lzo_assert((const lzo_voidp) (&dict[i]) == (const lzo_voidp) (&wrkmem[i * sizeof(lzo_byte *)]));
+    }
+
+    memset(&a,0,sizeof(a));
+    r &= __lzo_assert(a.a_charp == NULL);
+    r &= __lzo_assert(a.a_lzo_bytep == NULL);
+    //r &= __lzo_assert(NULL == 0);
+    if (r == 1)
+    {
+	for (i = 0; i < 10; i++)
+	    dict[i] = wrkmem;
+	BZERO8_PTR(dict+1,sizeof(dict[0]),8);
+	r &= __lzo_assert(dict[0] == wrkmem);
+	for (i = 1; i < 9; i++)
+	    r &= __lzo_assert(dict[i] == NULL);
+	r &= __lzo_assert(dict[9] == wrkmem);
+    }
+
+    if (r == 1)
+    {
+	unsigned k = 1;
+	const unsigned n = (unsigned) sizeof(lzo_uint32);
+	lzo_byte *p0;
+	lzo_byte *p1;
+
+	k += __lzo_align_gap(&x[k],n);
+	p0 = (lzo_bytep) &x[k];
+#if defined(PTR_LINEAR)
+	r &= __lzo_assert((PTR_LINEAR(p0) & (n-1)) == 0);
+#else
+	r &= __lzo_assert(n == 4);
+	r &= __lzo_assert(PTR_ALIGNED_4(p0));
+#endif
+
+	r &= __lzo_assert(k >= 1);
+	p1 = (lzo_bytep) &x[1];
+	r &= __lzo_assert(PTR_GE(p0,p1));
+
+	r &= __lzo_assert(k < 1+n);
+	p1 = (lzo_bytep) &x[1+n];
+	r &= __lzo_assert(PTR_LT(p0,p1));
+
+	if (r == 1)
+	{
+	    lzo_uint32 v0 = * (lzo_uint32 *) &x[k];
+	    lzo_uint32 v1 = * (lzo_uint32 *) &x[k+n];
+
+	    r &= __lzo_assert(v0 > 0);
+	    r &= __lzo_assert(v1 > 0);
+	}
+    }
+
+    return r;
+}
+
+LZO_PUBLIC(int)
+_lzo_config_check(void)
+{
+    lzo_bool r = 1;
+    int i;
+    union {
+	lzo_uint32 a;
+	unsigned short b;
+	lzo_uint32 aa[4];
+	unsigned char x[4*sizeof(lzo_align_t)];
+    } u;
+
+#if 0
+    r &= __lzo_assert((const void *)&u == (const void *)&u.a);
+    r &= __lzo_assert((const void *)&u == (const void *)&u.b);
+    r &= __lzo_assert((const void *)&u == (const void *)&u.x[0]);
+    r &= __lzo_assert((const void *)&u == (const void *)&u.aa[0]);
+#endif
+
+    r &= basic_integral_check();
+    r &= basic_ptr_check();
+    if (r != 1)
+	return LZO_E_ERROR;
+
+    u.a = 0; u.b = 0;
+    for (i = 0; i < (int) sizeof(u.x); i++)
+	u.x[i] = LZO_BYTE(i);
+
+#if 0
+    r &= __lzo_assert( (int) (unsigned char) ((char) -1) == 255);
+#endif
+
+#if defined(LZO_BYTE_ORDER)
+    if (r == 1)
+    {
+#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+	lzo_uint32 a = (lzo_uint32) (u.a & LZO_0xffffffffL);
+	unsigned short b = (unsigned short) (u.b & 0xffff);
+	r &= __lzo_assert(a == 0x03020100L);
+	r &= __lzo_assert(b == 0x0100);
+#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
+	lzo_uint32 a = u.a >> (8 * sizeof(u.a) - 32);
+	unsigned short b = u.b >> (8 * sizeof(u.b) - 16);
+	r &= __lzo_assert(a == 0x00010203L);
+	r &= __lzo_assert(b == 0x0001);
+#  else
+#    error invalid LZO_BYTE_ORDER
+#  endif
+    }
+#endif
+
+#if defined(LZO_UNALIGNED_OK_2)
+    r &= __lzo_assert(sizeof(short) == 2);
+    if (r == 1)
+    {
+	unsigned short b[4];
+
+	for (i = 0; i < 4; i++)
+	    b[i] = * (const unsigned short *) &u.x[i];
+
+#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+	r &= __lzo_assert(b[0] == 0x0100);
+	r &= __lzo_assert(b[1] == 0x0201);
+	r &= __lzo_assert(b[2] == 0x0302);
+	r &= __lzo_assert(b[3] == 0x0403);
+#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
+	r &= __lzo_assert(b[0] == 0x0001);
+	r &= __lzo_assert(b[1] == 0x0102);
+	r &= __lzo_assert(b[2] == 0x0203);
+	r &= __lzo_assert(b[3] == 0x0304);
+#  endif
+    }
+#endif
+
+#if defined(LZO_UNALIGNED_OK_4)
+    r &= __lzo_assert(sizeof(lzo_uint32) == 4);
+    if (r == 1)
+    {
+	lzo_uint32 a[4];
+
+	for (i = 0; i < 4; i++)
+	    a[i] = * (const lzo_uint32 *) &u.x[i];
+
+#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+	r &= __lzo_assert(a[0] == 0x03020100L);
+	r &= __lzo_assert(a[1] == 0x04030201L);
+	r &= __lzo_assert(a[2] == 0x05040302L);
+	r &= __lzo_assert(a[3] == 0x06050403L);
+#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
+	r &= __lzo_assert(a[0] == 0x00010203L);
+	r &= __lzo_assert(a[1] == 0x01020304L);
+	r &= __lzo_assert(a[2] == 0x02030405L);
+	r &= __lzo_assert(a[3] == 0x03040506L);
+#  endif
+    }
+#endif
+
+#if defined(LZO_ALIGNED_OK_4)
+    r &= __lzo_assert(sizeof(lzo_uint32) == 4);
+#endif
+
+    r &= __lzo_assert(lzo_sizeof_dict_t == sizeof(lzo_dict_t));
+
+#if defined(__LZO_IN_MINLZO)
+    if (r == 1)
+    {
+	lzo_uint32 adler;
+	adler = lzo_adler32(0, NULL, 0);
+	adler = lzo_adler32(adler, lzo_copyright(), 200);
+	r &= __lzo_assert(adler == 0x7ea34377L);
+    }
+#endif
+
+    if (r == 1)
+    {
+	r &= __lzo_assert(!schedule_insns_bug());
+    }
+
+    if (r == 1)
+    {
+	static int x[3];
+	static unsigned xn = 3;
+	register unsigned j;
+
+	for (j = 0; j < xn; j++)
+	    x[j] = (int)j - 3;
+	r &= __lzo_assert(!strength_reduce_bug(x));
+    }
+
+    if (r == 1)
+    {
+	r &= ptr_check();
+    }
+
+    return r == 1 ? LZO_E_OK : LZO_E_ERROR;
+}
+
+static lzo_bool schedule_insns_bug(void)
+{
+#if defined(__LZO_CHECKER)
+    return 0;
+#else
+    const int clone[] = {1, 2, 0};
+    const int *q;
+    q = clone;
+    return (*q) ? 0 : 1;
+#endif
+}
+
+static lzo_bool strength_reduce_bug(int *x)
+{
+    return x[0] != -3 || x[1] != -2 || x[2] != -1;
+}
+
+int __lzo_init_done = 0;
+
+LZO_PUBLIC(int)
+__lzo_init2(unsigned v, int s1, int s2, int s3, int s4, int s5,
+			int s6, int s7, int s8, int s9)
+{
+    int r;
+
+    __lzo_init_done = 1;
+
+    if (v == 0)
+	return LZO_E_ERROR;
+
+    r = (s1 == -1 || s1 == (int) sizeof(short)) &&
+	(s2 == -1 || s2 == (int) sizeof(int)) &&
+	(s3 == -1 || s3 == (int) sizeof(long)) &&
+	(s4 == -1 || s4 == (int) sizeof(lzo_uint32)) &&
+	(s5 == -1 || s5 == (int) sizeof(lzo_uint)) &&
+	(s6 == -1 || s6 == (int) lzo_sizeof_dict_t) &&
+	(s7 == -1 || s7 == (int) sizeof(char *)) &&
+	(s8 == -1 || s8 == (int) sizeof(lzo_voidp)) &&
+	(s9 == -1 || s9 == (int) sizeof(lzo_compress_t));
+    if (!r)
+	return LZO_E_ERROR;
+
+    r = _lzo_config_check();
+    if (r != LZO_E_OK)
+	return r;
+
+    return r;
+}
+
+#if !defined(__LZO_IN_MINILZO)
+
+LZO_EXTERN(int)
+__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7);
+
+LZO_PUBLIC(int)
+__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7)
+{
+    if (v == 0 || v > 0x1010)
+	return LZO_E_ERROR;
+    return __lzo_init2(v,s1,s2,s3,s4,s5,-1,-1,s6,s7);
+}
+
+#endif
+
+#define do_compress         _lzo1x_1_do_compress
+
+#define LZO_NEED_DICT_H
+#define D_BITS          14
+#define D_INDEX1(d,p)       d = DM((0x21*DX3(p,5,5,6)) >> 5)
+#define D_INDEX2(d,p)       d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
+
+#ifndef __LZO_CONFIG1X_H
+#define __LZO_CONFIG1X_H
+
+#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z)
+#  define LZO1X
+#endif
+
+#if !defined(__LZO_IN_MINILZO)
+#include <lzo1x.h>
+#endif
+
+#define LZO_EOF_CODE
+#undef LZO_DETERMINISTIC
+
+#define M1_MAX_OFFSET   0x0400
+#ifndef M2_MAX_OFFSET
+#define M2_MAX_OFFSET   0x0800
+#endif
+#define M3_MAX_OFFSET   0x4000
+#define M4_MAX_OFFSET   0xbfff
+
+#define MX_MAX_OFFSET   (M1_MAX_OFFSET + M2_MAX_OFFSET)
+
+#define M1_MIN_LEN      2
+#define M1_MAX_LEN      2
+#define M2_MIN_LEN      3
+#ifndef M2_MAX_LEN
+#define M2_MAX_LEN      8
+#endif
+#define M3_MIN_LEN      3
+#define M3_MAX_LEN      33
+#define M4_MIN_LEN      3
+#define M4_MAX_LEN      9
+
+#define M1_MARKER       0
+#define M2_MARKER       64
+#define M3_MARKER       32
+#define M4_MARKER       16
+
+#ifndef MIN_LOOKAHEAD
+#define MIN_LOOKAHEAD       (M2_MAX_LEN + 1)
+#endif
+
+#if defined(LZO_NEED_DICT_H)
+
+#ifndef LZO_HASH
+#define LZO_HASH            LZO_HASH_LZO_INCREMENTAL_B
+#endif
+#define DL_MIN_LEN          M2_MIN_LEN
+
+#ifndef __LZO_DICT_H
+#define __LZO_DICT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined(D_BITS) && defined(DBITS)
+#  define D_BITS        DBITS
+#endif
+#if !defined(D_BITS)
+#  error D_BITS is not defined
+#endif
+#if (D_BITS < 16)
+#  define D_SIZE        LZO_SIZE(D_BITS)
+#  define D_MASK        LZO_MASK(D_BITS)
+#else
+#  define D_SIZE        LZO_USIZE(D_BITS)
+#  define D_MASK        LZO_UMASK(D_BITS)
+#endif
+#define D_HIGH          ((D_MASK >> 1) + 1)
+
+#if !defined(DD_BITS)
+#  define DD_BITS       0
+#endif
+#define DD_SIZE         LZO_SIZE(DD_BITS)
+#define DD_MASK         LZO_MASK(DD_BITS)
+
+#if !defined(DL_BITS)
+#  define DL_BITS       (D_BITS - DD_BITS)
+#endif
+#if (DL_BITS < 16)
+#  define DL_SIZE       LZO_SIZE(DL_BITS)
+#  define DL_MASK       LZO_MASK(DL_BITS)
+#else
+#  define DL_SIZE       LZO_USIZE(DL_BITS)
+#  define DL_MASK       LZO_UMASK(DL_BITS)
+#endif
+
+#if (D_BITS != DL_BITS + DD_BITS)
+#  error D_BITS does not match
+#endif
+#if (D_BITS < 8 || D_BITS > 18)
+#  error invalid D_BITS
+#endif
+#if (DL_BITS < 8 || DL_BITS > 20)
+#  error invalid DL_BITS
+#endif
+#if (DD_BITS < 0 || DD_BITS > 6)
+#  error invalid DD_BITS
+#endif
+
+#if !defined(DL_MIN_LEN)
+#  define DL_MIN_LEN    3
+#endif
+#if !defined(DL_SHIFT)
+#  define DL_SHIFT      ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN)
+#endif
+
+#define LZO_HASH_GZIP                   1
+#define LZO_HASH_GZIP_INCREMENTAL       2
+#define LZO_HASH_LZO_INCREMENTAL_A      3
+#define LZO_HASH_LZO_INCREMENTAL_B      4
+
+#if !defined(LZO_HASH)
+#  error choose a hashing strategy
+#endif
+
+#if (DL_MIN_LEN == 3)
+#  define _DV2_A(p,shift1,shift2) \
+	(((( (lzo_uint32)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2])
+#  define _DV2_B(p,shift1,shift2) \
+	(((( (lzo_uint32)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0])
+#  define _DV3_B(p,shift1,shift2,shift3) \
+	((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0])
+#elif (DL_MIN_LEN == 2)
+#  define _DV2_A(p,shift1,shift2) \
+	(( (lzo_uint32)(p[0]) << shift1) ^ p[1])
+#  define _DV2_B(p,shift1,shift2) \
+	(( (lzo_uint32)(p[1]) << shift1) ^ p[2])
+#else
+#  error invalid DL_MIN_LEN
+#endif
+#define _DV_A(p,shift)      _DV2_A(p,shift,shift)
+#define _DV_B(p,shift)      _DV2_B(p,shift,shift)
+#define DA2(p,s1,s2) \
+	(((((lzo_uint32)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0])
+#define DS2(p,s1,s2) \
+	(((((lzo_uint32)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0])
+#define DX2(p,s1,s2) \
+	(((((lzo_uint32)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0])
+#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0])
+#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0])
+#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0])
+#define DMS(v,s)        ((lzo_uint) (((v) & (D_MASK >> (s))) << (s)))
+#define DM(v)           DMS(v,0)
+
+#if (LZO_HASH == LZO_HASH_GZIP)
+#  define _DINDEX(dv,p)     (_DV_A((p),DL_SHIFT))
+
+#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL)
+#  define __LZO_HASH_INCREMENTAL
+#  define DVAL_FIRST(dv,p)  dv = _DV_A((p),DL_SHIFT)
+#  define DVAL_NEXT(dv,p)   dv = (((dv) << DL_SHIFT) ^ p[2])
+#  define _DINDEX(dv,p)     (dv)
+#  define DVAL_LOOKAHEAD    DL_MIN_LEN
+
+#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A)
+#  define __LZO_HASH_INCREMENTAL
+#  define DVAL_FIRST(dv,p)  dv = _DV_A((p),5)
+#  define DVAL_NEXT(dv,p) \
+		dv ^= (lzo_uint32)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2])
+#  define _DINDEX(dv,p)     ((0x9f5f * (dv)) >> 5)
+#  define DVAL_LOOKAHEAD    DL_MIN_LEN
+
+#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B)
+#  define __LZO_HASH_INCREMENTAL
+#  define DVAL_FIRST(dv,p)  dv = _DV_B((p),5)
+#  define DVAL_NEXT(dv,p) \
+		dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_uint32)(p[2]) << (2*5)))
+#  define _DINDEX(dv,p)     ((0x9f5f * (dv)) >> 5)
+#  define DVAL_LOOKAHEAD    DL_MIN_LEN
+
+#else
+#  error choose a hashing strategy
+#endif
+
+#ifndef DINDEX
+#define DINDEX(dv,p)        ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS)
+#endif
+#if !defined(DINDEX1) && defined(D_INDEX1)
+#define DINDEX1             D_INDEX1
+#endif
+#if !defined(DINDEX2) && defined(D_INDEX2)
+#define DINDEX2             D_INDEX2
+#endif
+
+#if !defined(__LZO_HASH_INCREMENTAL)
+#  define DVAL_FIRST(dv,p)  ((void) 0)
+#  define DVAL_NEXT(dv,p)   ((void) 0)
+#  define DVAL_LOOKAHEAD    0
+#endif
+
+#if !defined(DVAL_ASSERT)
+#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG)
+static void DVAL_ASSERT(lzo_uint32 dv, const lzo_byte *p)
+{
+    lzo_uint32 df;
+    DVAL_FIRST(df,(p));
+    assert(DINDEX(dv,p) == DINDEX(df,p));
+}
+#else
+#  define DVAL_ASSERT(dv,p) ((void) 0)
+#endif
+#endif
+
+#if defined(LZO_DICT_USE_PTR)
+#  define DENTRY(p,in)                          (p)
+#  define GINDEX(m_pos,m_off,dict,dindex,in)    m_pos = dict[dindex]
+#else
+#  define DENTRY(p,in)                          ((lzo_uint) ((p)-(in)))
+#  define GINDEX(m_pos,m_off,dict,dindex,in)    m_off = dict[dindex]
+#endif
+
+#if (DD_BITS == 0)
+
+#  define UPDATE_D(dict,drun,dv,p,in)       dict[ DINDEX(dv,p) ] = DENTRY(p,in)
+#  define UPDATE_I(dict,drun,index,p,in)    dict[index] = DENTRY(p,in)
+#  define UPDATE_P(ptr,drun,p,in)           (ptr)[0] = DENTRY(p,in)
+
+#else
+
+#  define UPDATE_D(dict,drun,dv,p,in)   \
+	dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
+#  define UPDATE_I(dict,drun,index,p,in)    \
+	dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
+#  define UPDATE_P(ptr,drun,p,in)   \
+	(ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK
+
+#endif
+
+#if defined(LZO_DICT_USE_PTR)
+
+#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \
+	(m_pos == NULL || (m_off = (lzo_moff_t) (ip - m_pos)) > max_offset)
+
+#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
+    (BOUNDS_CHECKING_OFF_IN_EXPR( \
+	(PTR_LT(m_pos,in) || \
+	 (m_off = (lzo_moff_t) PTR_DIFF(ip,m_pos)) <= 0 || \
+	  m_off > max_offset) ))
+
+#else
+
+#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \
+	(m_off == 0 || \
+	 ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \
+	 (m_pos = (ip) - (m_off), 0) )
+
+#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
+	((lzo_moff_t) ((ip)-(in)) <= m_off || \
+	 ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \
+	 (m_pos = (ip) - (m_off), 0) )
+
+#endif
+
+#if defined(LZO_DETERMINISTIC)
+#  define LZO_CHECK_MPOS    LZO_CHECK_MPOS_DET
+#else
+#  define LZO_CHECK_MPOS    LZO_CHECK_MPOS_NON_DET
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+#endif
+
+#endif
+
+#define DO_COMPRESS     lzo1x_1_compress
+
+static
+lzo_uint do_compress     ( const lzo_byte *in , lzo_uint  in_len,
+				 lzo_byte *out, lzo_uint *out_len,
+				 lzo_voidp wrkmem )
+{
+#if 0 && defined(__GNUC__) && defined(__i386__)
+    register const lzo_byte *ip __asm__("%esi");
+#else
+    register const lzo_byte *ip;
+#endif
+    lzo_byte *op;
+    const lzo_byte * const in_end = in + in_len;
+    const lzo_byte * const ip_end = in + in_len - M2_MAX_LEN - 5;
+    const lzo_byte *ii;
+    lzo_dict_p const dict = (lzo_dict_p) wrkmem;
+
+    op = out;
+    ip = in;
+    ii = ip;
+
+    ip += 4;
+    for (;;)
+    {
+#if 0 && defined(__GNUC__) && defined(__i386__)
+	register const lzo_byte *m_pos __asm__("%edi");
+#else
+	register const lzo_byte *m_pos;
+#endif
+	lzo_moff_t m_off;
+	lzo_uint m_len;
+	lzo_uint dindex;
+
+	DINDEX1(dindex,ip);
+	GINDEX(m_pos,m_off,dict,dindex,in);
+	if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
+	    goto literal;
+#if 1
+	if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
+	    goto try_match;
+	DINDEX2(dindex,ip);
+#endif
+	GINDEX(m_pos,m_off,dict,dindex,in);
+	if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
+	    goto literal;
+	if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
+	    goto try_match;
+	goto literal;
+
+try_match:
+#if 1 && defined(LZO_UNALIGNED_OK_2)
+	if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip)
+#else
+	if (m_pos[0] != ip[0] || m_pos[1] != ip[1])
+#endif
+	{
+	}
+	else
+	{
+	    if (m_pos[2] == ip[2])
+	    {
+#if 0
+		if (m_off <= M2_MAX_OFFSET)
+		    goto match;
+		if (lit <= 3)
+		    goto match;
+		if (lit == 3)
+		{
+		    assert(op - 2 > out); op[-2] |= LZO_BYTE(3);
+		    *op++ = *ii++; *op++ = *ii++; *op++ = *ii++;
+		    goto code_match;
+		}
+		if (m_pos[3] == ip[3])
+#endif
+		    goto match;
+	    }
+	    else
+	    {
+#if 0
+#if 0
+		if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3)
+#else
+		if (m_off <= M1_MAX_OFFSET && lit == 3)
+#endif
+		{
+		    register lzo_uint t;
+
+		    t = lit;
+		    assert(op - 2 > out); op[-2] |= LZO_BYTE(t);
+		    do *op++ = *ii++; while (--t > 0);
+		    assert(ii == ip);
+		    m_off -= 1;
+		    *op++ = LZO_BYTE(M1_MARKER | ((m_off & 3) << 2));
+		    *op++ = LZO_BYTE(m_off >> 2);
+		    ip += 2;
+		    goto match_done;
+		}
+#endif
+	    }
+	}
+
+literal:
+	UPDATE_I(dict,0,dindex,ip,in);
+	++ip;
+	if (ip >= ip_end)
+	    break;
+	continue;
+
+match:
+	UPDATE_I(dict,0,dindex,ip,in);
+	if (ip - ii > 0)
+	{
+	    register lzo_uint t = ip - ii;
+
+	    if (t <= 3)
+	    {
+		assert(op - 2 > out);
+		op[-2] |= LZO_BYTE(t);
+	    }
+	    else if (t <= 18)
+		*op++ = LZO_BYTE(t - 3);
+	    else
+	    {
+		register lzo_uint tt = t - 18;
+
+		*op++ = 0;
+		while (tt > 255)
+		{
+		    tt -= 255;
+		    *op++ = 0;
+		}
+		assert(tt > 0);
+		*op++ = LZO_BYTE(tt);
+	    }
+	    do *op++ = *ii++; while (--t > 0);
+	}
+
+	assert(ii == ip);
+	ip += 3;
+	if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ ||
+	    m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++
+#ifdef LZO1Y
+	    || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++
+	    || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++
+#endif
+	   )
+	{
+	    --ip;
+	    m_len = ip - ii;
+	    assert(m_len >= 3); assert(m_len <= M2_MAX_LEN);
+
+	    if (m_off <= M2_MAX_OFFSET)
+	    {
+		m_off -= 1;
+#if defined(LZO1X)
+		*op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2));
+		*op++ = LZO_BYTE(m_off >> 3);
+#elif defined(LZO1Y)
+		*op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2));
+		*op++ = LZO_BYTE(m_off >> 2);
+#endif
+	    }
+	    else if (m_off <= M3_MAX_OFFSET)
+	    {
+		m_off -= 1;
+		*op++ = LZO_BYTE(M3_MARKER | (m_len - 2));
+		goto m3_m4_offset;
+	    }
+	    else
+#if defined(LZO1X)
+	    {
+		m_off -= 0x4000;
+		assert(m_off > 0); assert(m_off <= 0x7fff);
+		*op++ = LZO_BYTE(M4_MARKER |
+				 ((m_off & 0x4000) >> 11) | (m_len - 2));
+		goto m3_m4_offset;
+	    }
+#elif defined(LZO1Y)
+		goto m4_match;
+#endif
+	}
+	else
+	{
+	    {
+		const lzo_byte *end = in_end;
+		const lzo_byte *m = m_pos + M2_MAX_LEN + 1;
+		while (ip < end && *m == *ip)
+		    m++, ip++;
+		m_len = (ip - ii);
+	    }
+	    assert(m_len > M2_MAX_LEN);
+
+	    if (m_off <= M3_MAX_OFFSET)
+	    {
+		m_off -= 1;
+		if (m_len <= 33)
+		    *op++ = LZO_BYTE(M3_MARKER | (m_len - 2));
+		else
+		{
+		    m_len -= 33;
+		    *op++ = M3_MARKER | 0;
+		    goto m3_m4_len;
+		}
+	    }
+	    else
+	    {
+#if defined(LZO1Y)
+m4_match:
+#endif
+		m_off -= 0x4000;
+		assert(m_off > 0); assert(m_off <= 0x7fff);
+		if (m_len <= M4_MAX_LEN)
+		    *op++ = LZO_BYTE(M4_MARKER |
+				     ((m_off & 0x4000) >> 11) | (m_len - 2));
+		else
+		{
+		    m_len -= M4_MAX_LEN;
+		    *op++ = LZO_BYTE(M4_MARKER | ((m_off & 0x4000) >> 11));
+m3_m4_len:
+		    while (m_len > 255)
+		    {
+			m_len -= 255;
+			*op++ = 0;
+		    }
+		    assert(m_len > 0);
+		    *op++ = LZO_BYTE(m_len);
+		}
+	    }
+
+m3_m4_offset:
+	    *op++ = LZO_BYTE((m_off & 63) << 2);
+	    *op++ = LZO_BYTE(m_off >> 6);
+	}
+
+#if 0
+match_done:
+#endif
+	ii = ip;
+	if (ip >= ip_end)
+	    break;
+    }
+
+    *out_len = op - out;
+    return (lzo_uint) (in_end - ii);
+}
+
+LZO_PUBLIC(int)
+DO_COMPRESS      ( const lzo_byte *in , lzo_uint  in_len,
+			 lzo_byte *out, lzo_uint *out_len,
+			 lzo_voidp wrkmem )
+{
+    lzo_byte *op = out;
+    lzo_uint t;
+
+#if defined(__LZO_QUERY_COMPRESS)
+    if (__LZO_IS_COMPRESS_QUERY(in,in_len,out,out_len,wrkmem))
+	return __LZO_QUERY_COMPRESS(in,in_len,out,out_len,wrkmem,D_SIZE,lzo_sizeof(lzo_dict_t));
+#endif
+
+    if (in_len <= M2_MAX_LEN + 5)
+	t = in_len;
+    else
+    {
+	t = do_compress(in,in_len,op,out_len,wrkmem);
+	op += *out_len;
+    }
+
+    if (t > 0)
+    {
+	const lzo_byte *ii = in + in_len - t;
+
+	if (op == out && t <= 238)
+	    *op++ = LZO_BYTE(17 + t);
+	else if (t <= 3)
+	    op[-2] |= LZO_BYTE(t);
+	else if (t <= 18)
+	    *op++ = LZO_BYTE(t - 3);
+	else
+	{
+	    lzo_uint tt = t - 18;
+
+	    *op++ = 0;
+	    while (tt > 255)
+	    {
+		tt -= 255;
+		*op++ = 0;
+	    }
+	    assert(tt > 0);
+	    *op++ = LZO_BYTE(tt);
+	}
+	do *op++ = *ii++; while (--t > 0);
+    }
+
+    *op++ = M4_MARKER | 1;
+    *op++ = 0;
+    *op++ = 0;
+
+    *out_len = op - out;
+    return LZO_E_OK;
+}
+
+#undef do_compress
+#undef DO_COMPRESS
+#undef LZO_HASH
+
+#undef LZO_TEST_DECOMPRESS_OVERRUN
+#undef LZO_TEST_DECOMPRESS_OVERRUN_INPUT
+#undef LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT
+#undef LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
+#undef DO_DECOMPRESS
+#define DO_DECOMPRESS       lzo1x_decompress
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_INPUT       2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT      2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
+#  endif
+#endif
+
+#undef TEST_IP
+#undef TEST_OP
+#undef TEST_LOOKBEHIND
+#undef NEED_IP
+#undef NEED_OP
+#undef HAVE_TEST_IP
+#undef HAVE_TEST_OP
+#undef HAVE_NEED_IP
+#undef HAVE_NEED_OP
+#undef HAVE_ANY_IP
+#undef HAVE_ANY_OP
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
+#    define TEST_IP             (ip < ip_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
+#    define NEED_IP(x) \
+	    if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x))  goto input_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
+#    define TEST_OP             (op <= op_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
+#    undef TEST_OP
+#    define NEED_OP(x) \
+	    if ((lzo_uint)(op_end - op) < (lzo_uint)(x))  goto output_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#  define TEST_LOOKBEHIND(m_pos,out)    if (m_pos < out) goto lookbehind_overrun
+#else
+#  define TEST_LOOKBEHIND(m_pos,op)     ((void) 0)
+#endif
+
+#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
+#  define TEST_IP               (ip < ip_end)
+#endif
+
+#if defined(TEST_IP)
+#  define HAVE_TEST_IP
+#else
+#  define TEST_IP               1
+#endif
+#if defined(TEST_OP)
+#  define HAVE_TEST_OP
+#else
+#  define TEST_OP               1
+#endif
+
+#if defined(NEED_IP)
+#  define HAVE_NEED_IP
+#else
+#  define NEED_IP(x)            ((void) 0)
+#endif
+#if defined(NEED_OP)
+#  define HAVE_NEED_OP
+#else
+#  define NEED_OP(x)            ((void) 0)
+#endif
+
+#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
+#  define HAVE_ANY_IP
+#endif
+#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
+#  define HAVE_ANY_OP
+#endif
+
+#if defined(DO_DECOMPRESS)
+LZO_PUBLIC(int)
+DO_DECOMPRESS  ( const lzo_byte *in , lzo_uint  in_len,
+		       lzo_byte *out, lzo_uint *out_len,
+		       lzo_voidp wrkmem )
+#endif
+{
+    register lzo_byte *op;
+    register const lzo_byte *ip;
+    register lzo_uint t;
+#if defined(COPY_DICT)
+    lzo_uint m_off;
+    const lzo_byte *dict_end;
+#else
+    register const lzo_byte *m_pos;
+#endif
+
+    const lzo_byte * const ip_end = in + in_len;
+#if defined(HAVE_ANY_OP)
+    lzo_byte * const op_end = out + *out_len;
+#endif
+#if defined(LZO1Z)
+    lzo_uint last_m_off = 0;
+#endif
+
+    LZO_UNUSED(wrkmem);
+
+#if defined(__LZO_QUERY_DECOMPRESS)
+    if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem))
+	return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0);
+#endif
+
+#if defined(COPY_DICT)
+    if (dict)
+    {
+	if (dict_len > M4_MAX_OFFSET)
+	{
+	    dict += dict_len - M4_MAX_OFFSET;
+	    dict_len = M4_MAX_OFFSET;
+	}
+	dict_end = dict + dict_len;
+    }
+    else
+    {
+	dict_len = 0;
+	dict_end = NULL;
+    }
+#endif
+
+    *out_len = 0;
+
+    op = out;
+    ip = in;
+
+    if (*ip > 17)
+    {
+	t = *ip++ - 17;
+	if (t < 4)
+	    goto match_next;
+	assert(t > 0); NEED_OP(t); NEED_IP(t+1);
+	do *op++ = *ip++; while (--t > 0);
+	goto first_literal_run;
+    }
+
+    while (TEST_IP && TEST_OP)
+    {
+	t = *ip++;
+	if (t >= 16)
+	    goto match;
+	if (t == 0)
+	{
+	    NEED_IP(1);
+	    while (*ip == 0)
+	    {
+		t += 255;
+		ip++;
+		NEED_IP(1);
+	    }
+	    t += 15 + *ip++;
+	}
+	assert(t > 0); NEED_OP(t+3); NEED_IP(t+4);
+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
+#if !defined(LZO_UNALIGNED_OK_4)
+	if (PTR_ALIGNED2_4(op,ip))
+	{
+#endif
+	* (lzo_uint32p) op = * (const lzo_uint32p) ip;
+	op += 4; ip += 4;
+	if (--t > 0)
+	{
+	    if (t >= 4)
+	    {
+		do {
+		    * (lzo_uint32p) op = * (const lzo_uint32p) ip;
+		    op += 4; ip += 4; t -= 4;
+		} while (t >= 4);
+		if (t > 0) do *op++ = *ip++; while (--t > 0);
+	    }
+	    else
+		do *op++ = *ip++; while (--t > 0);
+	}
+#if !defined(LZO_UNALIGNED_OK_4)
+	}
+	else
+#endif
+#endif
+#if !defined(LZO_UNALIGNED_OK_4)
+	{
+	    *op++ = *ip++; *op++ = *ip++; *op++ = *ip++;
+	    do *op++ = *ip++; while (--t > 0);
+	}
+#endif
+
+first_literal_run:
+
+	t = *ip++;
+	if (t >= 16)
+	    goto match;
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+	m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
+	last_m_off = m_off;
+#else
+	m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
+#endif
+	NEED_OP(3);
+	t = 3; COPY_DICT(t,m_off)
+#else
+#if defined(LZO1Z)
+	t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
+	m_pos = op - t;
+	last_m_off = t;
+#else
+	m_pos = op - (1 + M2_MAX_OFFSET);
+	m_pos -= t >> 2;
+	m_pos -= *ip++ << 2;
+#endif
+	TEST_LOOKBEHIND(m_pos,out); NEED_OP(3);
+	*op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos;
+#endif
+	goto match_done;
+
+	while (TEST_IP && TEST_OP)
+	{
+match:
+	    if (t >= 64)
+	    {
+#if defined(COPY_DICT)
+#if defined(LZO1X)
+		m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
+		t = (t >> 5) - 1;
+#elif defined(LZO1Y)
+		m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
+		t = (t >> 4) - 3;
+#elif defined(LZO1Z)
+		m_off = t & 0x1f;
+		if (m_off >= 0x1c)
+		    m_off = last_m_off;
+		else
+		{
+		    m_off = 1 + (m_off << 6) + (*ip++ >> 2);
+		    last_m_off = m_off;
+		}
+		t = (t >> 5) - 1;
+#endif
+#else
+#if defined(LZO1X)
+		m_pos = op - 1;
+		m_pos -= (t >> 2) & 7;
+		m_pos -= *ip++ << 3;
+		t = (t >> 5) - 1;
+#elif defined(LZO1Y)
+		m_pos = op - 1;
+		m_pos -= (t >> 2) & 3;
+		m_pos -= *ip++ << 2;
+		t = (t >> 4) - 3;
+#elif defined(LZO1Z)
+		{
+		    lzo_uint off = t & 0x1f;
+		    m_pos = op;
+		    if (off >= 0x1c)
+		    {
+			assert(last_m_off > 0);
+			m_pos -= last_m_off;
+		    }
+		    else
+		    {
+			off = 1 + (off << 6) + (*ip++ >> 2);
+			m_pos -= off;
+			last_m_off = off;
+		    }
+		}
+		t = (t >> 5) - 1;
+#endif
+		TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
+		goto copy_match;
+#endif
+	    }
+	    else if (t >= 32)
+	    {
+		t &= 31;
+		if (t == 0)
+		{
+		    NEED_IP(1);
+		    while (*ip == 0)
+		    {
+			t += 255;
+			ip++;
+			NEED_IP(1);
+		    }
+		    t += 31 + *ip++;
+		}
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+		m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
+		last_m_off = m_off;
+#else
+		m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
+#endif
+#else
+#if defined(LZO1Z)
+		{
+		    lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2);
+		    m_pos = op - off;
+		    last_m_off = off;
+		}
+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+		m_pos = op - 1;
+		m_pos -= (* (const lzo_ushortp) ip) >> 2;
+#else
+		m_pos = op - 1;
+		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
+#endif
+#endif
+		ip += 2;
+	    }
+	    else if (t >= 16)
+	    {
+#if defined(COPY_DICT)
+		m_off = (t & 8) << 11;
+#else
+		m_pos = op;
+		m_pos -= (t & 8) << 11;
+#endif
+		t &= 7;
+		if (t == 0)
+		{
+		    NEED_IP(1);
+		    while (*ip == 0)
+		    {
+			t += 255;
+			ip++;
+			NEED_IP(1);
+		    }
+		    t += 7 + *ip++;
+		}
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+		m_off += (ip[0] << 6) + (ip[1] >> 2);
+#else
+		m_off += (ip[0] >> 2) + (ip[1] << 6);
+#endif
+		ip += 2;
+		if (m_off == 0)
+		    goto eof_found;
+		m_off += 0x4000;
+#if defined(LZO1Z)
+		last_m_off = m_off;
+#endif
+#else
+#if defined(LZO1Z)
+		m_pos -= (ip[0] << 6) + (ip[1] >> 2);
+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+		m_pos -= (* (const lzo_ushortp) ip) >> 2;
+#else
+		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
+#endif
+		ip += 2;
+		if (m_pos == op)
+		    goto eof_found;
+		m_pos -= 0x4000;
+#if defined(LZO1Z)
+		last_m_off = op - m_pos;
+#endif
+#endif
+	    }
+	    else
+	    {
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+		m_off = 1 + (t << 6) + (*ip++ >> 2);
+		last_m_off = m_off;
+#else
+		m_off = 1 + (t >> 2) + (*ip++ << 2);
+#endif
+		NEED_OP(2);
+		t = 2; COPY_DICT(t,m_off)
+#else
+#if defined(LZO1Z)
+		t = 1 + (t << 6) + (*ip++ >> 2);
+		m_pos = op - t;
+		last_m_off = t;
+#else
+		m_pos = op - 1;
+		m_pos -= t >> 2;
+		m_pos -= *ip++ << 2;
+#endif
+		TEST_LOOKBEHIND(m_pos,out); NEED_OP(2);
+		*op++ = *m_pos++; *op++ = *m_pos;
+#endif
+		goto match_done;
+	    }
+
+#if defined(COPY_DICT)
+
+	    NEED_OP(t+3-1);
+	    t += 3-1; COPY_DICT(t,m_off)
+
+#else
+
+	    TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
+#if !defined(LZO_UNALIGNED_OK_4)
+	    if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos))
+	    {
+		assert((op - m_pos) >= 4);
+#else
+	    if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4)
+	    {
+#endif
+		* (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
+		op += 4; m_pos += 4; t -= 4 - (3 - 1);
+		do {
+		    * (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
+		    op += 4; m_pos += 4; t -= 4;
+		} while (t >= 4);
+		if (t > 0) do *op++ = *m_pos++; while (--t > 0);
+	    }
+	    else
+#endif
+	    {
+copy_match:
+		*op++ = *m_pos++; *op++ = *m_pos++;
+		do *op++ = *m_pos++; while (--t > 0);
+	    }
+
+#endif
+
+match_done:
+#if defined(LZO1Z)
+	    t = ip[-1] & 3;
+#else
+	    t = ip[-2] & 3;
+#endif
+	    if (t == 0)
+		break;
+
+match_next:
+	    assert(t > 0); NEED_OP(t); NEED_IP(t+1);
+	    do *op++ = *ip++; while (--t > 0);
+	    t = *ip++;
+	}
+    }
+
+#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
+    *out_len = op - out;
+    return LZO_E_EOF_NOT_FOUND;
+#endif
+
+eof_found:
+    assert(t == 1);
+    *out_len = op - out;
+    return (ip == ip_end ? LZO_E_OK :
+	   (ip < ip_end  ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
+
+#if defined(HAVE_NEED_IP)
+input_overrun:
+    *out_len = op - out;
+    return LZO_E_INPUT_OVERRUN;
+#endif
+
+#if defined(HAVE_NEED_OP)
+output_overrun:
+    *out_len = op - out;
+    return LZO_E_OUTPUT_OVERRUN;
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+lookbehind_overrun:
+    *out_len = op - out;
+    return LZO_E_LOOKBEHIND_OVERRUN;
+#endif
+}
+
+#define LZO_TEST_DECOMPRESS_OVERRUN
+#undef DO_DECOMPRESS
+#define DO_DECOMPRESS       lzo1x_decompress_safe
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_INPUT       2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT      2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
+#  endif
+#endif
+
+#undef TEST_IP
+#undef TEST_OP
+#undef TEST_LOOKBEHIND
+#undef NEED_IP
+#undef NEED_OP
+#undef HAVE_TEST_IP
+#undef HAVE_TEST_OP
+#undef HAVE_NEED_IP
+#undef HAVE_NEED_OP
+#undef HAVE_ANY_IP
+#undef HAVE_ANY_OP
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
+#    define TEST_IP             (ip < ip_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
+#    define NEED_IP(x) \
+	    if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x))  goto input_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
+#    define TEST_OP             (op <= op_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
+#    undef TEST_OP
+#    define NEED_OP(x) \
+	    if ((lzo_uint)(op_end - op) < (lzo_uint)(x))  goto output_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#  define TEST_LOOKBEHIND(m_pos,out)    if (m_pos < out) goto lookbehind_overrun
+#else
+#  define TEST_LOOKBEHIND(m_pos,op)     ((void) 0)
+#endif
+
+#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
+#  define TEST_IP               (ip < ip_end)
+#endif
+
+#if defined(TEST_IP)
+#  define HAVE_TEST_IP
+#else
+#  define TEST_IP               1
+#endif
+#if defined(TEST_OP)
+#  define HAVE_TEST_OP
+#else
+#  define TEST_OP               1
+#endif
+
+#if defined(NEED_IP)
+#  define HAVE_NEED_IP
+#else
+#  define NEED_IP(x)            ((void) 0)
+#endif
+#if defined(NEED_OP)
+#  define HAVE_NEED_OP
+#else
+#  define NEED_OP(x)            ((void) 0)
+#endif
+
+#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
+#  define HAVE_ANY_IP
+#endif
+#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
+#  define HAVE_ANY_OP
+#endif
+
+#if defined(DO_DECOMPRESS)
+LZO_PUBLIC(int)
+DO_DECOMPRESS  ( const lzo_byte *in , lzo_uint  in_len,
+		       lzo_byte *out, lzo_uint *out_len,
+		       lzo_voidp wrkmem )
+#endif
+{
+    register lzo_byte *op;
+    register const lzo_byte *ip;
+    register lzo_uint t;
+#if defined(COPY_DICT)
+    lzo_uint m_off;
+    const lzo_byte *dict_end;
+#else
+    register const lzo_byte *m_pos;
+#endif
+
+    const lzo_byte * const ip_end = in + in_len;
+#if defined(HAVE_ANY_OP)
+    lzo_byte * const op_end = out + *out_len;
+#endif
+#if defined(LZO1Z)
+    lzo_uint last_m_off = 0;
+#endif
+
+    LZO_UNUSED(wrkmem);
+
+#if defined(__LZO_QUERY_DECOMPRESS)
+    if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem))
+	return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0);
+#endif
+
+#if defined(COPY_DICT)
+    if (dict)
+    {
+	if (dict_len > M4_MAX_OFFSET)
+	{
+	    dict += dict_len - M4_MAX_OFFSET;
+	    dict_len = M4_MAX_OFFSET;
+	}
+	dict_end = dict + dict_len;
+    }
+    else
+    {
+	dict_len = 0;
+	dict_end = NULL;
+    }
+#endif
+
+    *out_len = 0;
+
+    op = out;
+    ip = in;
+
+    if (*ip > 17)
+    {
+	t = *ip++ - 17;
+	if (t < 4)
+	    goto match_next;
+	assert(t > 0); NEED_OP(t); NEED_IP(t+1);
+	do *op++ = *ip++; while (--t > 0);
+	goto first_literal_run;
+    }
+
+    while (TEST_IP && TEST_OP)
+    {
+	t = *ip++;
+	if (t >= 16)
+	    goto match;
+	if (t == 0)
+	{
+	    NEED_IP(1);
+	    while (*ip == 0)
+	    {
+		t += 255;
+		ip++;
+		NEED_IP(1);
+	    }
+	    t += 15 + *ip++;
+	}
+	assert(t > 0); NEED_OP(t+3); NEED_IP(t+4);
+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
+#if !defined(LZO_UNALIGNED_OK_4)
+	if (PTR_ALIGNED2_4(op,ip))
+	{
+#endif
+	* (lzo_uint32p) op = * (const lzo_uint32p) ip;
+	op += 4; ip += 4;
+	if (--t > 0)
+	{
+	    if (t >= 4)
+	    {
+		do {
+		    * (lzo_uint32p) op = * (const lzo_uint32p) ip;
+		    op += 4; ip += 4; t -= 4;
+		} while (t >= 4);
+		if (t > 0) do *op++ = *ip++; while (--t > 0);
+	    }
+	    else
+		do *op++ = *ip++; while (--t > 0);
+	}
+#if !defined(LZO_UNALIGNED_OK_4)
+	}
+	else
+#endif
+#endif
+#if !defined(LZO_UNALIGNED_OK_4)
+	{
+	    *op++ = *ip++; *op++ = *ip++; *op++ = *ip++;
+	    do *op++ = *ip++; while (--t > 0);
+	}
+#endif
+
+first_literal_run:
+
+	t = *ip++;
+	if (t >= 16)
+	    goto match;
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+	m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
+	last_m_off = m_off;
+#else
+	m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
+#endif
+	NEED_OP(3);
+	t = 3; COPY_DICT(t,m_off)
+#else
+#if defined(LZO1Z)
+	t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
+	m_pos = op - t;
+	last_m_off = t;
+#else
+	m_pos = op - (1 + M2_MAX_OFFSET);
+	m_pos -= t >> 2;
+	m_pos -= *ip++ << 2;
+#endif
+	TEST_LOOKBEHIND(m_pos,out); NEED_OP(3);
+	*op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos;
+#endif
+	goto match_done;
+
+	while (TEST_IP && TEST_OP)
+	{
+match:
+	    if (t >= 64)
+	    {
+#if defined(COPY_DICT)
+#if defined(LZO1X)
+		m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
+		t = (t >> 5) - 1;
+#elif defined(LZO1Y)
+		m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
+		t = (t >> 4) - 3;
+#elif defined(LZO1Z)
+		m_off = t & 0x1f;
+		if (m_off >= 0x1c)
+		    m_off = last_m_off;
+		else
+		{
+		    m_off = 1 + (m_off << 6) + (*ip++ >> 2);
+		    last_m_off = m_off;
+		}
+		t = (t >> 5) - 1;
+#endif
+#else
+#if defined(LZO1X)
+		m_pos = op - 1;
+		m_pos -= (t >> 2) & 7;
+		m_pos -= *ip++ << 3;
+		t = (t >> 5) - 1;
+#elif defined(LZO1Y)
+		m_pos = op - 1;
+		m_pos -= (t >> 2) & 3;
+		m_pos -= *ip++ << 2;
+		t = (t >> 4) - 3;
+#elif defined(LZO1Z)
+		{
+		    lzo_uint off = t & 0x1f;
+		    m_pos = op;
+		    if (off >= 0x1c)
+		    {
+			assert(last_m_off > 0);
+			m_pos -= last_m_off;
+		    }
+		    else
+		    {
+			off = 1 + (off << 6) + (*ip++ >> 2);
+			m_pos -= off;
+			last_m_off = off;
+		    }
+		}
+		t = (t >> 5) - 1;
+#endif
+		TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
+		goto copy_match;
+#endif
+	    }
+	    else if (t >= 32)
+	    {
+		t &= 31;
+		if (t == 0)
+		{
+		    NEED_IP(1);
+		    while (*ip == 0)
+		    {
+			t += 255;
+			ip++;
+			NEED_IP(1);
+		    }
+		    t += 31 + *ip++;
+		}
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+		m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
+		last_m_off = m_off;
+#else
+		m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
+#endif
+#else
+#if defined(LZO1Z)
+		{
+		    lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2);
+		    m_pos = op - off;
+		    last_m_off = off;
+		}
+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+		m_pos = op - 1;
+		m_pos -= (* (const lzo_ushortp) ip) >> 2;
+#else
+		m_pos = op - 1;
+		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
+#endif
+#endif
+		ip += 2;
+	    }
+	    else if (t >= 16)
+	    {
+#if defined(COPY_DICT)
+		m_off = (t & 8) << 11;
+#else
+		m_pos = op;
+		m_pos -= (t & 8) << 11;
+#endif
+		t &= 7;
+		if (t == 0)
+		{
+		    NEED_IP(1);
+		    while (*ip == 0)
+		    {
+			t += 255;
+			ip++;
+			NEED_IP(1);
+		    }
+		    t += 7 + *ip++;
+		}
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+		m_off += (ip[0] << 6) + (ip[1] >> 2);
+#else
+		m_off += (ip[0] >> 2) + (ip[1] << 6);
+#endif
+		ip += 2;
+		if (m_off == 0)
+		    goto eof_found;
+		m_off += 0x4000;
+#if defined(LZO1Z)
+		last_m_off = m_off;
+#endif
+#else
+#if defined(LZO1Z)
+		m_pos -= (ip[0] << 6) + (ip[1] >> 2);
+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+		m_pos -= (* (const lzo_ushortp) ip) >> 2;
+#else
+		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
+#endif
+		ip += 2;
+		if (m_pos == op)
+		    goto eof_found;
+		m_pos -= 0x4000;
+#if defined(LZO1Z)
+		last_m_off = op - m_pos;
+#endif
+#endif
+	    }
+	    else
+	    {
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+		m_off = 1 + (t << 6) + (*ip++ >> 2);
+		last_m_off = m_off;
+#else
+		m_off = 1 + (t >> 2) + (*ip++ << 2);
+#endif
+		NEED_OP(2);
+		t = 2; COPY_DICT(t,m_off)
+#else
+#if defined(LZO1Z)
+		t = 1 + (t << 6) + (*ip++ >> 2);
+		m_pos = op - t;
+		last_m_off = t;
+#else
+		m_pos = op - 1;
+		m_pos -= t >> 2;
+		m_pos -= *ip++ << 2;
+#endif
+		TEST_LOOKBEHIND(m_pos,out); NEED_OP(2);
+		*op++ = *m_pos++; *op++ = *m_pos;
+#endif
+		goto match_done;
+	    }
+
+#if defined(COPY_DICT)
+
+	    NEED_OP(t+3-1);
+	    t += 3-1; COPY_DICT(t,m_off)
+
+#else
+
+	    TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
+#if !defined(LZO_UNALIGNED_OK_4)
+	    if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos))
+	    {
+		assert((op - m_pos) >= 4);
+#else
+	    if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4)
+	    {
+#endif
+		* (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
+		op += 4; m_pos += 4; t -= 4 - (3 - 1);
+		do {
+		    * (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
+		    op += 4; m_pos += 4; t -= 4;
+		} while (t >= 4);
+		if (t > 0) do *op++ = *m_pos++; while (--t > 0);
+	    }
+	    else
+#endif
+	    {
+copy_match:
+		*op++ = *m_pos++; *op++ = *m_pos++;
+		do *op++ = *m_pos++; while (--t > 0);
+	    }
+
+#endif
+
+match_done:
+#if defined(LZO1Z)
+	    t = ip[-1] & 3;
+#else
+	    t = ip[-2] & 3;
+#endif
+	    if (t == 0)
+		break;
+
+match_next:
+	    assert(t > 0); NEED_OP(t); NEED_IP(t+1);
+	    do *op++ = *ip++; while (--t > 0);
+	    t = *ip++;
+	}
+    }
+
+#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
+    *out_len = op - out;
+    return LZO_E_EOF_NOT_FOUND;
+#endif
+
+eof_found:
+    assert(t == 1);
+    *out_len = op - out;
+    return (ip == ip_end ? LZO_E_OK :
+	   (ip < ip_end  ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
+
+#if defined(HAVE_NEED_IP)
+input_overrun:
+    *out_len = op - out;
+    return LZO_E_INPUT_OVERRUN;
+#endif
+
+#if defined(HAVE_NEED_OP)
+output_overrun:
+    *out_len = op - out;
+    return LZO_E_OUTPUT_OVERRUN;
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+lookbehind_overrun:
+    *out_len = op - out;
+    return LZO_E_LOOKBEHIND_OVERRUN;
+#endif
+}
+
+/***** End of minilzo.c *****/
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/minilzo.h	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,97 @@
+/* minilzo.h -- mini subset of the LZO real-time data compression library
+
+   This file is part of the LZO real-time data compression library.
+
+   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
+
+   The LZO library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of
+   the License, or (at your option) any later version.
+
+   The LZO library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with the LZO library; see the file COPYING.
+   If not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   Markus F.X.J. Oberhumer
+   <markus.oberhumer@jk.uni-linz.ac.at>
+   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
+ */
+
+/*
+ * NOTE:
+ *   the full LZO package can be found at
+ *   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
+ */
+
+
+#ifndef __MINILZO_H
+#define __MINILZO_H
+
+#define MINILZO_VERSION         0x1070
+
+#ifdef __LZOCONF_H
+#  error "you cannot use both LZO and miniLZO"
+#endif
+
+#undef LZO_HAVE_CONFIG_H
+#include "lzoconf.h"
+
+#if !defined(LZO_VERSION) || (LZO_VERSION != MINILZO_VERSION)
+#  error "version mismatch in header files"
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/***********************************************************************
+//
+************************************************************************/
+
+/* Memory required for the wrkmem parameter.
+ * When the required size is 0, you can also pass a NULL pointer.
+ */
+
+#define LZO1X_MEM_COMPRESS      LZO1X_1_MEM_COMPRESS
+#define LZO1X_1_MEM_COMPRESS    ((lzo_uint32) (16384L * lzo_sizeof_dict_t))
+#define LZO1X_MEM_DECOMPRESS    (0)
+
+
+/* compression */
+LZO_EXTERN(int)
+lzo1x_1_compress        ( const lzo_byte *src, lzo_uint  src_len,
+                                lzo_byte *dst, lzo_uint *dst_len,
+                                lzo_voidp wrkmem );
+
+/* decompression */
+LZO_EXTERN(int)
+lzo1x_decompress        ( const lzo_byte *src, lzo_uint  src_len,
+                                lzo_byte *dst, lzo_uint *dst_len,
+                                lzo_voidp wrkmem /* NOT USED */ );
+
+/* safe decompression with overrun testing */
+LZO_EXTERN(int)
+lzo1x_decompress_safe   ( const lzo_byte *src, lzo_uint  src_len,
+                                lzo_byte *dst, lzo_uint *dst_len,
+                                lzo_voidp wrkmem /* NOT USED */ );
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* already included */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/msvidc.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,393 @@
+/*
+    Microsoft Video 1 Decoder
+    
+    (C) 2001 Mike Melanson
+    
+    The description of the algorithm you can read here:
+      http://www.pcisys.net/~melanson/codecs/
+
+    32bpp support (c) alex
+*/
+
+#include "config.h"
+#include "bswap.h"
+
+#define LE_16(x) (le2me_16(*(unsigned short *)(x)))
+
+#define DECODE_BGR555_TO_BGR888(x) \
+        x.c1_b = (x.c1 >> 7) & 0xF8; \
+        x.c1_g = (x.c1 >> 2) & 0xF8; \
+        x.c1_r = (x.c1 << 3) & 0xF8; \
+        x.c2_b = (x.c2 >> 7) & 0xF8; \
+        x.c2_g = (x.c2 >> 2) & 0xF8; \
+        x.c2_r = (x.c2 << 3) & 0xF8;
+
+#define DECODE_PALETTE_TO_BGR888(x) \
+        x.c1_b = palette_map[x.c1 * 4 + 2]; \
+        x.c1_g = palette_map[x.c1 * 4 + 1]; \
+        x.c1_r = palette_map[x.c1 * 4 + 0]; \
+        x.c2_b = palette_map[x.c2 * 4 + 2]; \
+        x.c2_g = palette_map[x.c2 * 4 + 1]; \
+        x.c2_r = palette_map[x.c2 * 4 + 0];
+
+struct
+{
+  unsigned short c1, c2;
+  unsigned char c1_r, c1_g, c1_b;
+  unsigned char c2_r, c2_g, c2_b;
+} quad[2][2];
+
+void AVI_Decode_Video1_16(
+  char *encoded,
+  int encoded_size,
+  char *decoded,
+  int width,
+  int height,
+  int bytes_per_pixel)
+{
+  int block_ptr, pixel_ptr;
+  int total_blocks;
+  int pixel_x, pixel_y;  // pixel width and height iterators
+  int block_x, block_y;  // block width and height iterators
+  int blocks_wide, blocks_high;  // width and height in 4x4 blocks
+  int block_inc;
+  int row_dec;
+
+  // decoding parameters
+  int stream_ptr;
+  unsigned char byte_a, byte_b;
+  unsigned short flags;
+  int skip_blocks;
+
+  stream_ptr = 0;
+  skip_blocks = 0;
+  blocks_wide = width / 4;
+  blocks_high = height / 4;
+  total_blocks = blocks_wide * blocks_high;
+  block_inc = 4 * bytes_per_pixel;
+  row_dec = (width + 4) * bytes_per_pixel;
+
+  for (block_y = blocks_high; block_y > 0; block_y--)
+  {
+    block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel);
+    for (block_x = blocks_wide; block_x > 0; block_x--)
+    {
+      // check if this block should be skipped
+      if (skip_blocks)
+      {
+        block_ptr += block_inc;
+        skip_blocks--;
+        total_blocks--;
+        continue;
+      }
+
+      pixel_ptr = block_ptr;
+
+      // get the next two bytes in the encoded data stream
+      byte_a = encoded[stream_ptr++];
+      byte_b = encoded[stream_ptr++];
+
+      // check if the decode is finished
+      if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0))
+        return;
+
+      // check if this is a skip code
+      else if ((byte_b & 0xFC) == 0x84)
+      {
+        // but don't count the current block
+        skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1;
+      }
+
+      // check if this is in the 2- or 8-color classes
+      else if (byte_b < 0x80)
+      {
+        flags = (byte_b << 8) | byte_a;
+
+        quad[0][0].c1 = LE_16(&encoded[stream_ptr]);
+        stream_ptr += 2;
+        quad[0][0].c2 = LE_16(&encoded[stream_ptr]);
+        stream_ptr += 2;
+
+        DECODE_BGR555_TO_BGR888(quad[0][0]);
+
+        if (quad[0][0].c1 & 0x8000)
+        {
+          // 8-color encoding
+          quad[1][0].c1 = LE_16(&encoded[stream_ptr]);
+          stream_ptr += 2;
+          quad[1][0].c2 = LE_16(&encoded[stream_ptr]);
+          stream_ptr += 2;
+          quad[0][1].c1 = LE_16(&encoded[stream_ptr]);
+          stream_ptr += 2;
+          quad[0][1].c2 = LE_16(&encoded[stream_ptr]);
+          stream_ptr += 2;
+          quad[1][1].c1 = LE_16(&encoded[stream_ptr]);
+          stream_ptr += 2;
+          quad[1][1].c2 = LE_16(&encoded[stream_ptr]);
+          stream_ptr += 2;
+
+          DECODE_BGR555_TO_BGR888(quad[0][1]);
+          DECODE_BGR555_TO_BGR888(quad[1][0]);
+          DECODE_BGR555_TO_BGR888(quad[1][1]);
+
+          for (pixel_y = 0; pixel_y < 4; pixel_y++)
+          {
+            for (pixel_x = 0; pixel_x < 4; pixel_x++)
+            {
+              if (flags & 1)
+              {
+                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r;
+                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g;
+                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b;
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+              else
+              {
+                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r;
+                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g;
+                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b;
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+
+              // get the next flag ready to go
+              flags >>= 1;
+            }
+            pixel_ptr -= row_dec;
+          }
+        }
+        else
+        {
+          // 2-color encoding
+          for (pixel_y = 0; pixel_y < 4; pixel_y++)
+          {
+            for (pixel_x = 0; pixel_x < 4; pixel_x++)
+            {
+              if (flags & 1)
+              {
+                decoded[pixel_ptr++] = quad[0][0].c1_r;
+                decoded[pixel_ptr++] = quad[0][0].c1_g;
+                decoded[pixel_ptr++] = quad[0][0].c1_b;
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+              else
+              {
+                decoded[pixel_ptr++] = quad[0][0].c2_r;
+                decoded[pixel_ptr++] = quad[0][0].c2_g;
+                decoded[pixel_ptr++] = quad[0][0].c2_b;
+		if (bytes_per_pixel == 4) /* 32bpp */
+		    pixel_ptr++;
+              }
+
+              // get the next flag ready to go
+              flags >>= 1;
+            }
+            pixel_ptr -= row_dec;
+          }
+        }
+      }
+
+      // otherwise, it's a 1-color block
+      else
+      {
+        quad[0][0].c1 = (byte_b << 8) | byte_a;
+        DECODE_BGR555_TO_BGR888(quad[0][0]);
+
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            decoded[pixel_ptr++] = quad[0][0].c1_r;
+            decoded[pixel_ptr++] = quad[0][0].c1_g;
+            decoded[pixel_ptr++] = quad[0][0].c1_b;
+	    if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+          }
+          pixel_ptr -= row_dec;
+        }
+      }
+
+      block_ptr += block_inc;
+      total_blocks--;
+    }
+  }
+}
+
+void AVI_Decode_Video1_8(
+  char *encoded,
+  int encoded_size,
+  char *decoded,
+  int width,
+  int height,
+  unsigned char *palette_map,
+  int bytes_per_pixel)
+{
+  int block_ptr, pixel_ptr;
+  int total_blocks;
+  int pixel_x, pixel_y;  // pixel width and height iterators
+  int block_x, block_y;  // block width and height iterators
+  int blocks_wide, blocks_high;  // width and height in 4x4 blocks
+  int block_inc;
+  int row_dec;
+
+  // decoding parameters
+  int stream_ptr;
+  unsigned char byte_a, byte_b;
+  unsigned short flags;
+  int skip_blocks;
+
+  stream_ptr = 0;
+  skip_blocks = 0;
+  blocks_wide = width / 4;
+  blocks_high = height / 4;
+  total_blocks = blocks_wide * blocks_high;
+  block_inc = 4 * bytes_per_pixel;
+  row_dec = (width + 4) * bytes_per_pixel;
+
+  for (block_y = blocks_high; block_y > 0; block_y--)
+  {
+    block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel);
+    for (block_x = blocks_wide; block_x > 0; block_x--)
+    {
+      // check if this block should be skipped
+      if (skip_blocks)
+      {
+        block_ptr += block_inc;
+        skip_blocks--;
+        total_blocks--;
+        continue;
+      }
+
+      pixel_ptr = block_ptr;
+
+      // get the next two bytes in the encoded data stream
+      byte_a = encoded[stream_ptr++];
+      byte_b = encoded[stream_ptr++];
+
+      // check if the decode is finished
+      if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0))
+        return;
+
+      // check if this is a skip code
+      else if ((byte_b & 0xFC) == 0x84)
+      {
+        // but don't count the current block
+        skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1;
+      }
+
+      // check if this is a 2-color block
+      else if (byte_b < 0x80)
+      {
+        flags = (byte_b << 8) | byte_a;
+
+        quad[0][0].c1 = (unsigned char)encoded[stream_ptr++];
+        quad[0][0].c2 = (unsigned char)encoded[stream_ptr++];
+
+        DECODE_PALETTE_TO_BGR888(quad[0][0]);
+
+        // 2-color encoding
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            if (flags & 1)
+            {
+              decoded[pixel_ptr++] = quad[0][0].c1_r;
+              decoded[pixel_ptr++] = quad[0][0].c1_g;
+              decoded[pixel_ptr++] = quad[0][0].c1_b;
+	      if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+            }
+            else
+            {
+              decoded[pixel_ptr++] = quad[0][0].c2_r;
+              decoded[pixel_ptr++] = quad[0][0].c2_g;
+              decoded[pixel_ptr++] = quad[0][0].c2_b;
+	      if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+            }
+
+            // get the next flag ready to go
+            flags >>= 1;
+          }
+          pixel_ptr -= row_dec;
+        }
+      }
+
+      // check if it's an 8-color block
+      else if (byte_b >= 0x90)
+      {
+        flags = (byte_b << 8) | byte_a;
+
+        quad[0][0].c1 = (unsigned char)encoded[stream_ptr++];
+        quad[0][0].c2 = (unsigned char)encoded[stream_ptr++];
+        quad[1][0].c1 = (unsigned char)encoded[stream_ptr++];
+        quad[1][0].c2 = (unsigned char)encoded[stream_ptr++];
+
+        quad[0][1].c1 = (unsigned char)encoded[stream_ptr++];
+        quad[0][1].c2 = (unsigned char)encoded[stream_ptr++];
+        quad[1][1].c1 = (unsigned char)encoded[stream_ptr++];
+        quad[1][1].c2 = (unsigned char)encoded[stream_ptr++];
+
+        DECODE_PALETTE_TO_BGR888(quad[0][0]);
+        DECODE_PALETTE_TO_BGR888(quad[0][1]);
+        DECODE_PALETTE_TO_BGR888(quad[1][0]);
+        DECODE_PALETTE_TO_BGR888(quad[1][1]);
+
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            if (flags & 1)
+            {
+              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r;
+              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g;
+              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b;
+	      if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+            }
+            else
+            {
+              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r;
+              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g;
+              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b;
+	      if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+            }
+
+            // get the next flag ready to go
+            flags >>= 1;
+          }
+          pixel_ptr -= row_dec;
+        }
+      }
+
+      // otherwise, it's a 1-color block
+      else
+      {
+        // init c2 along with c1 just so c2 is a known value for macro
+        quad[0][0].c1 = quad[0][0].c2 = byte_a;
+        DECODE_PALETTE_TO_BGR888(quad[0][0]);
+
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            decoded[pixel_ptr++] = quad[0][0].c1_r;
+            decoded[pixel_ptr++] = quad[0][0].c1_g;
+            decoded[pixel_ptr++] = quad[0][0].c1_b;
+	    if (bytes_per_pixel == 4) /* 32bpp */
+		pixel_ptr++;
+          }
+          pixel_ptr -= row_dec;
+        }
+      }
+
+      block_ptr += block_inc;
+      total_blocks--;
+    }
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/nuppelvideo.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,123 @@
+/*
+ * NuppelVideo 0.05 file parser
+ * for MPlayer
+ * by Panagiotis Issaris <takis@lumumba.luc.ac.be>
+ *
+ * Reworked by alex
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "mp_msg.h"
+
+#include "fastmemcpy.h"
+
+#include "libmpdemux/nuppelvideo.h" 
+#include "RTjpegN.h"
+#include "minilzo.h"
+
+#define KEEP_BUFFER
+
+void decode_nuv( unsigned char *encoded, int encoded_size,
+		unsigned char *decoded, int width, int height)
+{
+	int r;
+	unsigned int out_len;
+	struct rtframeheader *encodedh = ( struct rtframeheader* ) encoded;
+	static unsigned char *buffer = 0; /* for RTJpeg with LZO decompress */
+#ifdef KEEP_BUFFER
+	static unsigned char *previous_buffer = 0; /* to support Last-frame-copy */
+#endif
+	static int is_lzo_inited = 0;
+
+//	printf("frametype: %c, comtype: %c, encoded_size: %d, width: %d, height: %d\n",
+//	    encodedh->frametype, encodedh->comptype, encoded_size, width, height);
+
+	switch(encodedh->frametype)
+	{
+	    case 'D':	/* additional data for compressors */
+	    {
+		/* tables are in encoded */
+		if (encodedh->comptype == 'R')
+		{
+		    RTjpeg_init_decompress ( encoded+12, width, height );
+		    mp_msg(MSGT_DECVIDEO, MSGL_V, "Found RTjpeg tables (size: %d, width: %d, height: %d)\n",
+			encoded_size-12, width, height);
+		}
+		break;
+	    }
+	    case 'V':
+	    {
+#ifdef KEEP_BUFFER		
+		if (!previous_buffer) 
+			previous_buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 );
+#endif
+
+		if (((encodedh->comptype == '2') ||
+		    (encodedh->comptype == '3')) && !is_lzo_inited)
+		{
+		    /* frame using lzo, init lzo first if not inited */
+		    if ( lzo_init() != LZO_E_OK ) 
+		    {
+			mp_msg(MSGT_DECVIDEO, MSGL_ERR, "LZO init failed\n");
+			return;
+		    }
+		    is_lzo_inited = 1;
+		}
+
+		switch(encodedh->comptype)
+		{
+		    case '0': /* raw YUV420 */
+			memcpy(decoded, encoded + 12, width*height*3/2);
+			break;
+		    case '1': /* RTJpeg */
+			RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded );
+			break;
+		    case '2': /* RTJpeg with LZO */
+			if (!buffer) 
+			    buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 );
+			if (!buffer)
+			{
+			    mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
+			    break;
+			}
+			r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, buffer, &out_len, NULL );
+			if ( r != LZO_E_OK ) 
+			{
+			    mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
+			    break;
+			}
+			RTjpeg_decompressYUV420 ( ( __s8 * ) buffer, decoded );
+			break;
+		    case '3': /* raw YUV420 with LZO */
+			r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, decoded, &out_len, NULL );
+			if ( r != LZO_E_OK ) 
+			{
+			    mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
+			    break;
+			}
+			break;
+		    case 'N': /* black frame */
+			memset ( decoded, 0,  width * height );
+			memset ( decoded + width * height, 127, width * height / 2);
+			break;
+		    case 'L': /* copy last frame */
+#ifdef KEEP_BUFFER
+			memcpy ( decoded, previous_buffer, width*height*3/2);
+#endif
+			break;
+		}
+
+#ifdef KEEP_BUFFER
+		memcpy(previous_buffer, decoded, width*height*3/2);
+#endif
+		break;
+	    }
+	    default:
+		mp_msg(MSGT_DECVIDEO, MSGL_V, "Nuppelvideo: unknwon frametype: %c\n",
+		    encodedh->frametype);
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/qtrle.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,125 @@
+/*
+    Quicktime Animation (RLE) Decoder for MPlayer
+
+    (C) 2001 Mike Melanson
+*/
+
+#include "config.h"
+#include "bswap.h"
+
+#define BE_16(x) (be2me_16(*(unsigned short *)(x)))
+#define BE_32(x) (be2me_32(*(unsigned int *)(x)))
+
+// 256 RGB entries; 25% of these bytes will be unused, but it's faster
+// to index 4-byte entries
+static unsigned char palette[256 * 4];
+
+void qt_decode_rle24(
+  unsigned char *encoded,
+  int encoded_size,
+  unsigned char *decoded,
+  int width,
+  int height,
+  int bytes_per_pixel)
+{
+  int stream_ptr;
+  int header;
+  int start_line;
+  int lines_to_change;
+  signed char rle_code;
+  int row_ptr, pixel_ptr;
+  int row_inc = bytes_per_pixel * width;
+  unsigned char r, g, b;
+
+  // check if this frame is even supposed to change
+  if (encoded_size < 8)
+    return;
+
+  // start after the chunk size
+  stream_ptr = 4;
+
+  // fetch the header
+  header = BE_16(&encoded[stream_ptr]);
+  stream_ptr += 2;
+
+  // if a header is present, fetch additional decoding parameters
+  if (header & 0x0008)
+  {
+    start_line = BE_16(&encoded[stream_ptr]);
+    stream_ptr += 4;
+    lines_to_change = BE_16(&encoded[stream_ptr]);
+    stream_ptr += 4;
+  }
+  else
+  {
+    start_line = 0;
+    lines_to_change = height;
+  }
+
+  row_ptr = row_inc * start_line;
+  while (lines_to_change--)
+  {
+    pixel_ptr = row_ptr + ((encoded[stream_ptr++] - 1) * bytes_per_pixel);
+
+    while ((rle_code = (signed char)encoded[stream_ptr++]) != -1)
+    {
+      if (rle_code == 0)
+        // there's another skip code in the stream
+        pixel_ptr += ((encoded[stream_ptr++] - 1) * bytes_per_pixel);
+      else if (rle_code < 0)
+      {
+        // decode the run length code
+        rle_code = -rle_code;
+        r = encoded[stream_ptr++];
+        g = encoded[stream_ptr++];
+        b = encoded[stream_ptr++];
+        while (rle_code--)
+        {
+          decoded[pixel_ptr++] = b;
+          decoded[pixel_ptr++] = g;
+          decoded[pixel_ptr++] = r;
+          if (bytes_per_pixel == 4)
+            pixel_ptr++;
+        }
+      }
+      else
+      {
+        // copy pixels directly to output
+        while (rle_code--)
+        {
+          decoded[pixel_ptr++] = encoded[stream_ptr + 2];
+          decoded[pixel_ptr++] = encoded[stream_ptr + 1];
+          decoded[pixel_ptr++] = encoded[stream_ptr + 0];
+          stream_ptr += 3;
+          if (bytes_per_pixel == 4)
+            pixel_ptr++;
+        }
+      }
+    }
+
+    row_ptr += row_inc;
+  }
+}
+
+void qt_decode_rle(
+  unsigned char *encoded,
+  int encoded_size,
+  unsigned char *decoded,
+  int width,
+  int height,
+  int encoded_bpp,
+  int bytes_per_pixel)
+{
+  switch (encoded_bpp)
+  {
+    case 24:
+      qt_decode_rle24(
+        encoded,
+        encoded_size,
+        decoded,
+        width,
+        height,
+        bytes_per_pixel);
+      break;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/qtrpza.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,264 @@
+/*
+ *
+ * Apple Video (rpza) QuickTime Decoder for Mplayer
+ * (c) 2002 Roberto Togni
+ *
+ * Fourcc: rpza, azpr
+ *
+ * Some code comes from qtsmc.c by Mike Melanson
+ *
+ * A description of the decoding algorithm can be found here:
+ *   http://www.pcisys.net/~melanson/codecs/
+ */
+
+#include "config.h"
+#include "bswap.h"
+#include "mp_msg.h"
+
+#define BE_16(x) (be2me_16(*(unsigned short *)(x)))
+#define BE_32(x) (be2me_32(*(unsigned int *)(x)))
+
+
+#define ADVANCE_BLOCK() \
+{ \
+	pixel_ptr += block_x_inc; \
+	if (pixel_ptr >= (width * bytes_per_pixel)) \
+	{ \
+		pixel_ptr = 0; \
+		row_ptr += block_y_inc * 4; \
+	} \
+	total_blocks--; \
+	if (total_blocks < 0) \
+	{ \
+		mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \
+		return; \
+	} \
+}
+
+#define PAINT_CURRENT_PIXEL(r, g, b, color) \
+{ \
+	if (bytes_per_pixel == 2) { \
+		(*(unsigned short*)(&decoded[block_ptr])) = color & 0x7fff; \
+		block_ptr += 2; \
+	} else { \
+		decoded[block_ptr++] = (b); \
+		decoded[block_ptr++] = (g); \
+		decoded[block_ptr++] = (r); \
+		if (bytes_per_pixel == 4) /* 32bpp */ \
+			block_ptr++; \
+	} \
+}
+
+#define COLOR_FIX(col_out, col_in) (col_out) = ((col_in) << 3) | ((col_in) >> 2)
+
+#define COLOR_TO_RGB(r, g, b, color) \
+{ \
+	if (bytes_per_pixel != 2) { \
+		unsigned short tmp; \
+		tmp = (color >> 10) & 0x1f; \
+		COLOR_FIX (r, tmp); \
+		tmp = (color >> 5) & 0x1f; \
+		COLOR_FIX (g, tmp); \
+		tmp = color & 0x1f; \
+		COLOR_FIX (b, tmp); \
+	} \
+}
+
+#define COLORAB_TO_RGB4(rgb4, color4, colorA, colorB) \
+{ \
+	unsigned short ta, tb, tt; \
+	if (bytes_per_pixel != 2) { \
+		ta = (colorA >> 10) & 0x1f; \
+		tb = (colorB >> 10) & 0x1f; \
+		COLOR_FIX (rgb4[3][0], ta); \
+		COLOR_FIX (rgb4[0][0], tb); \
+		tt = (11 * ta + 21 * tb) >> 5; \
+		COLOR_FIX (rgb4[1][0], tt); \
+		tt = (21 * ta + 11 * tb) >> 5; \
+		COLOR_FIX (rgb4[2][0], tt); \
+		ta = (colorA >> 5) & 0x1f; \
+		tb = (colorB >> 5) & 0x1f; \
+		COLOR_FIX (rgb4[3][1], ta); \
+		COLOR_FIX (rgb4[0][1], tb); \
+		tt = (11 * ta + 21 * tb) >> 5; \
+		COLOR_FIX (rgb4[1][1], tt); \
+		tt = (21 * ta + 11 * tb) >> 5; \
+		COLOR_FIX (rgb4[2][1], tt); \
+		ta = colorA	& 0x1f; \
+		tb = colorB	& 0x1f; \
+		COLOR_FIX (rgb4[3][2], ta); \
+		COLOR_FIX (rgb4[0][2], tb); \
+		tt = (11 * ta + 21 * tb) >> 5; \
+		COLOR_FIX (rgb4[1][2], tt); \
+		tt = (21 * ta + 11 * tb) >> 5; \
+		COLOR_FIX (rgb4[2][2], tt); \
+	} else { \
+		color4[3] = colorA; \
+		color4[0] = colorB; \
+		ta = (colorA >> 10) & 0x1f; \
+		tb = (colorB >> 10) & 0x1f; \
+		color4[1] = ((11 * ta + 21 * tb) << 5) & 0x7c00; \
+		color4[2] = ((21 * ta + 11 * tb) << 5) & 0x7c00; \
+		ta = (colorA >> 5) & 0x1f; \
+		tb = (colorB >> 5) & 0x1f; \
+		color4[1] |= (11 * ta + 21 * tb) & 0x3e0; \
+		color4[2] |= (21 * ta + 11 * tb) & 0x3e0; \
+		ta = colorA	& 0x1f; \
+		tb = colorB	& 0x1f; \
+		color4[1] |= (11 * ta + 21 * tb) >> 5; \
+		color4[2] |= (21 * ta + 11 * tb) >> 5; \
+	} \
+}
+
+
+
+/*
+ * rpza frame decoder
+ *
+ * Input values:
+ *
+ *	*encoded: buffer of encoded data (chunk)
+ *	encoded_size: length of encoded buffer
+ *	*decoded: buffer where decoded data is written (image buffer)
+ *	width: width of decoded frame in pixels
+ *	height: height of decoded frame in pixels
+ *	bytes_per_pixel: bytes/pixel in output image (color depth)
+ *
+ */
+
+void qt_decode_rpza(char *encoded, int encoded_size, char *decoded, int width,
+										int height, int bytes_per_pixel)
+{
+
+	int i;
+	int stream_ptr = 0;
+	int chunk_size;
+	unsigned char opcode;
+	int n_blocks;
+	unsigned short colorA, colorB;
+	unsigned char r, g, b;
+	unsigned char rgb4[4][3];
+	unsigned short color4[4];
+	unsigned char index, idx;
+
+	int row_ptr = 0;
+	int pixel_ptr = 0;
+	int pixel_x, pixel_y;
+	int row_inc = bytes_per_pixel * (width - 4);
+	int max_height = row_inc * height;
+	int block_x_inc = bytes_per_pixel * 4;
+	int block_y_inc = bytes_per_pixel * width;
+	int block_ptr;
+	int total_blocks;
+
+	
+	/* First byte is always 0xe1. Warn if it's different */
+	if ((unsigned char)encoded[stream_ptr] != 0xe1)
+		mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+					 "First chunk byte is 0x%02x instead of 0x1e\n",
+					 (unsigned char)encoded[stream_ptr]);
+
+	/* Get chunk size, ingnoring first byte */
+	chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF;
+	stream_ptr += 4;
+
+	/* If length mismatch use size from MOV file and try to decode anyway */
+	if (chunk_size != encoded_size)
+		mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n");
+
+	chunk_size = encoded_size;
+
+	/* Number of 4x4 blocks in frame. */
+	total_blocks = (width * height) / (4 * 4);
+
+	/* Process chunk data */
+	while (stream_ptr < chunk_size) {
+		opcode = encoded[stream_ptr++]; /* Get opcode */
+
+		n_blocks = (opcode & 0x1f) +1; /* Extract block counter from opcode */
+
+		/* If opcode MSbit is 0, we need more data to decide what to do */
+		if ((opcode & 0x80) == 0) {
+			colorA = (opcode << 8) | ((unsigned char)encoded[stream_ptr++]);
+			opcode = 0;
+			if ((encoded[stream_ptr] & 0x80) != 0) {
+				/* Must behave as opcode 110xxxxx, using colorA computed above.*/
+				/* Use fake opcode 0x20 to enter switch block at the right place */
+				opcode = 0x20;
+				n_blocks = 1;
+			}
+		}
+		switch (opcode & 0xe0) {
+			/* Skip blocks */
+			case 0x80:
+				while (n_blocks--)
+					ADVANCE_BLOCK();
+				break;
+
+			/* Fill blocks with one color */
+			case 0xa0:
+				colorA = BE_16 (&encoded[stream_ptr]);
+				stream_ptr += 2;
+				COLOR_TO_RGB (r, g, b, colorA);
+				while (n_blocks--) {
+					block_ptr = row_ptr + pixel_ptr;
+					for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+						for (pixel_x = 0; pixel_x < 4; pixel_x++){
+							PAINT_CURRENT_PIXEL(r, g, b, colorA);
+						}
+						block_ptr += row_inc;
+					}
+					ADVANCE_BLOCK();
+				}
+				break;
+
+			/* Fill blocks with 4 colors */
+			case 0xc0:
+				colorA = BE_16 (&encoded[stream_ptr]);
+				stream_ptr += 2;
+			case 0x20:
+				colorB = BE_16 (&encoded[stream_ptr]);
+				stream_ptr += 2;
+				COLORAB_TO_RGB4 (rgb4, color4, colorA, colorB);
+				while (n_blocks--) {
+					block_ptr = row_ptr + pixel_ptr;
+					for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+						index = encoded[stream_ptr++];
+						for (pixel_x = 0; pixel_x < 4; pixel_x++){
+							idx = (index >> (2 * (3 - pixel_x))) & 0x03;
+							PAINT_CURRENT_PIXEL(rgb4[idx][0], rgb4[idx][1], rgb4[idx][2], color4[idx]);
+						}
+						block_ptr += row_inc;
+					}
+					ADVANCE_BLOCK();
+				}
+				break;
+				
+			/* Fill block with 16 colors */
+			case 0x00:
+				block_ptr = row_ptr + pixel_ptr;
+				for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+					for (pixel_x = 0; pixel_x < 4; pixel_x++){
+						/* We already have color of upper left pixel */
+						if ((pixel_y != 0) || (pixel_x !=0)) {
+							colorA = BE_16 (&encoded[stream_ptr]);
+							stream_ptr += 2;
+						}
+						COLOR_TO_RGB (r, g, b, colorA);
+						PAINT_CURRENT_PIXEL(r, g, b, colorA);
+					}
+					block_ptr += row_inc;
+				}
+				ADVANCE_BLOCK();
+				break;
+				
+			/* Unknown opcode */
+			default:
+				mp_msg(MSGT_DECVIDEO, MSGL_HINT, "Unknown opcode %d in rpza chunk."
+							 " Skip remaining %lu bytes of chunk data.\n", opcode,
+							 chunk_size - stream_ptr);
+				return;
+		} /* Opcode switch */
+
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/qtsmc.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,513 @@
+/*
+    Apple Graphics (SMC) Decoder for MPlayer
+    by Mike Melanson
+    Special thanks for Roberto Togni <rtogni@bresciaonline.it> for
+    tracking down the final, nagging bugs.
+
+    The description of the decoding algorithm can be found here:
+      http://www.pcisys.net/~melanson/codecs/
+*/
+
+#include <stdlib.h>
+#include "config.h"
+#include "bswap.h"
+#include "mp_msg.h"
+
+#define BE_16(x) (be2me_16(*(unsigned short *)(x)))
+#define BE_32(x) (be2me_32(*(unsigned int *)(x)))
+
+#define COLORS_PER_TABLE 256
+#define BYTES_PER_COLOR 4
+
+#define CPAIR 2
+#define CQUAD 4
+#define COCTET 8
+
+static unsigned char *color_pairs;
+static unsigned char *color_quads;
+static unsigned char *color_octets;
+
+static int color_pair_index;
+static int color_quad_index;
+static int color_octet_index;
+
+static int smc_initialized;
+
+// returns 0 if successfully initialized (enough memory was available),
+//  non-zero on failure
+int qt_init_decode_smc(void)
+{
+  // be pessimistic to start
+  smc_initialized = 0;
+
+  // allocate memory for the 3 palette tables
+  if ((color_pairs = (unsigned char *)malloc(
+    COLORS_PER_TABLE * BYTES_PER_COLOR * 2)) == 0)
+    return 1;  
+  if ((color_quads = (unsigned char *)malloc(
+    COLORS_PER_TABLE * BYTES_PER_COLOR * 4)) == 0)
+    return 1;  
+  if ((color_octets = (unsigned char *)malloc(
+    COLORS_PER_TABLE * BYTES_PER_COLOR * 8)) == 0)
+    return 1;
+
+  // if execution got this far, initialization succeeded
+  smc_initialized = 1;
+  return 0;
+}
+
+#define GET_BLOCK_COUNT \
+  (opcode & 0x10) ? (1 + encoded[stream_ptr++]) : 1 + (opcode & 0x0F);
+#define ADVANCE_BLOCK() \
+{ \
+  pixel_ptr += block_x_inc; \
+  if (pixel_ptr >= byte_width) \
+  { \
+    pixel_ptr = 0; \
+    row_ptr += block_y_inc * 4; \
+  } \
+  total_blocks--; \
+  if (total_blocks < 0) \
+  { \
+    mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \
+    return; \
+  } \
+}
+
+void qt_decode_smc(
+  unsigned char *encoded,
+  int encoded_size,
+  unsigned char *decoded,
+  int pixel_width,
+  int pixel_height,
+  unsigned char *palette_map,
+  int bytes_per_pixel)
+{
+  int i;
+  int stream_ptr = 0;
+  int chunk_size;
+  unsigned char opcode;
+  int n_blocks;
+  unsigned int color_flags;
+  unsigned int color_flags_a;
+  unsigned int color_flags_b;
+  unsigned int flag_mask;
+
+  int byte_width = pixel_width * bytes_per_pixel;  // width of a row in bytes
+  int byte_height = pixel_height * byte_width;  // max image size, basically
+  int row_ptr = 0;
+  int pixel_ptr = 0;
+  int pixel_x, pixel_y;
+  int row_inc = bytes_per_pixel * (pixel_width - 4);
+  int block_x_inc = bytes_per_pixel * 4;
+  int block_y_inc = bytes_per_pixel * pixel_width;
+  int block_ptr;
+  int prev_block_ptr;
+  int prev_block_ptr1, prev_block_ptr2;
+  int prev_block_flag;
+  int total_blocks;
+  int color_table_index;  // indexes to color pair, quad, or octet tables
+  int color_index;  // indexes into palette map
+
+  if (!smc_initialized)
+    return;
+
+  // reset color tables
+  color_pair_index = 0;
+  color_quad_index = 0;
+  color_octet_index = 0;
+
+  chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF;
+  stream_ptr += 4;
+  if (chunk_size != encoded_size)
+    mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n");
+
+  chunk_size = encoded_size;
+  total_blocks = (pixel_width * pixel_height) / (4 * 4);
+
+  // traverse through the blocks
+  while (total_blocks)
+  {
+    // sanity checks
+    // make sure stream ptr hasn't gone out of bounds
+    if (stream_ptr > chunk_size)
+    {
+      mp_msg(MSGT_DECVIDEO, MSGL_ERR, 
+        "SMC decoder just went out of bounds (stream ptr = %d, chunk size = %d)\n",
+        stream_ptr, chunk_size);
+      return;
+    }
+    // make sure the row pointer hasn't gone wild
+    if (row_ptr >= byte_height)
+    {
+      mp_msg(MSGT_DECVIDEO, MSGL_ERR, 
+        "SMC decoder just went out of bounds (row ptr = %d, height = %d)\n",
+        row_ptr, byte_height);
+      return;
+    }
+
+    opcode = encoded[stream_ptr++];
+    switch (opcode & 0xF0)
+    {
+    // skip n blocks
+    case 0x00:
+    case 0x10:
+      n_blocks = GET_BLOCK_COUNT;
+      while (n_blocks--)
+        ADVANCE_BLOCK();
+      break;
+
+    // repeat last block n times
+    case 0x20:
+    case 0x30:
+      n_blocks = GET_BLOCK_COUNT;
+
+      // sanity check
+      if ((row_ptr == 0) && (pixel_ptr == 0))
+      {
+        mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+          "encountered repeat block opcode (%02X) but no blocks rendered yet\n",
+          opcode & 0xF0);
+        break;
+      }
+
+      // figure out where the previous block started
+      if (pixel_ptr == 0)
+        prev_block_ptr1 = (row_ptr - block_y_inc * 4) + 
+          byte_width - block_x_inc;
+      else
+        prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc;
+
+      while (n_blocks--)
+      {
+        block_ptr = row_ptr + pixel_ptr;
+        prev_block_ptr = prev_block_ptr1;
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            decoded[block_ptr++] = decoded[prev_block_ptr++];
+            decoded[block_ptr++] = decoded[prev_block_ptr++];
+            decoded[block_ptr++] = decoded[prev_block_ptr++];
+            if (bytes_per_pixel == 4) /* 32bpp */
+            {
+              block_ptr++;
+              prev_block_ptr++;
+            }
+          }
+          block_ptr += row_inc;
+          prev_block_ptr += row_inc;
+        }
+        ADVANCE_BLOCK();
+      }
+      break;
+
+    // repeat previous pair of blocks n times
+    case 0x40:
+    case 0x50:
+      n_blocks = GET_BLOCK_COUNT;
+      n_blocks *= 2;
+
+      // sanity check
+      if ((row_ptr == 0) && (pixel_ptr < 2 * block_x_inc))
+      {
+        mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+          "encountered repeat block opcode (%02X) but not enough blocks rendered yet\n",
+          opcode & 0xF0);
+        break;
+      }
+
+      // figure out where the previous 2 blocks started
+      if (pixel_ptr == 0)
+        prev_block_ptr1 = (row_ptr - block_y_inc * 4) + 
+          byte_width - block_x_inc * 2;
+      else if (pixel_ptr == block_x_inc)
+        prev_block_ptr1 = (row_ptr - block_y_inc * 4) + 
+          byte_width - block_x_inc;
+      else
+        prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc * 2;
+
+      if (pixel_ptr == 0)
+        prev_block_ptr2 = (row_ptr - block_y_inc * 4) + 
+          (byte_width - block_x_inc);
+      else
+        prev_block_ptr2 = row_ptr + pixel_ptr - block_x_inc;
+
+      prev_block_flag = 0;
+      while (n_blocks--)
+      {
+        block_ptr = row_ptr + pixel_ptr;
+        if (prev_block_flag)
+          prev_block_ptr = prev_block_ptr2;
+        else
+          prev_block_ptr = prev_block_ptr1;
+        prev_block_flag = !prev_block_flag;
+
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            decoded[block_ptr++] = decoded[prev_block_ptr++];
+            decoded[block_ptr++] = decoded[prev_block_ptr++];
+            decoded[block_ptr++] = decoded[prev_block_ptr++];
+            if (bytes_per_pixel == 4) /* 32bpp */
+            {
+              block_ptr++;
+              prev_block_ptr++;
+            }
+          }
+          block_ptr += row_inc;
+          prev_block_ptr += row_inc;
+        }
+        ADVANCE_BLOCK();
+      }
+      break;
+
+    // 1-color block encoding
+    case 0x60:
+    case 0x70:
+      n_blocks = GET_BLOCK_COUNT;
+      color_index = encoded[stream_ptr++] * 4;
+
+      while (n_blocks--)
+      {
+        block_ptr = row_ptr + pixel_ptr;
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            decoded[block_ptr++] = palette_map[color_index + 0];
+            decoded[block_ptr++] = palette_map[color_index + 1];
+            decoded[block_ptr++] = palette_map[color_index + 2];
+            if (bytes_per_pixel == 4) /* 32bpp */
+              block_ptr++;
+          }
+          block_ptr += row_inc;
+        }
+        ADVANCE_BLOCK();
+      }
+      break;
+
+    // 2-color block encoding
+    case 0x80:
+    case 0x90:
+      n_blocks = (opcode & 0x0F) + 1;
+
+      // figure out which color pair to use to paint the 2-color block
+      if ((opcode & 0xF0) == 0x80)
+      {
+        // fetch the next 2 colors from bytestream and store in next
+        // available entry in the color pair table
+        for (i = 0; i < CPAIR; i++)
+        {
+          color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
+          color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index + 
+            (i * BYTES_PER_COLOR);
+          color_pairs[color_table_index + 0] = palette_map[color_index + 0];
+          color_pairs[color_table_index + 1] = palette_map[color_index + 1];
+          color_pairs[color_table_index + 2] = palette_map[color_index + 2];
+        }
+        // this is the base index to use for this block
+        color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index;
+        color_pair_index++;
+        if (color_pair_index == COLORS_PER_TABLE)
+          color_pair_index = 0;
+      }
+      else
+        color_table_index = CPAIR * BYTES_PER_COLOR * encoded[stream_ptr++];
+
+      while (n_blocks--)
+      {
+        color_flags = BE_16(&encoded[stream_ptr]);
+        stream_ptr += 2;
+        flag_mask = 0x8000;
+        block_ptr = row_ptr + pixel_ptr;
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            if (color_flags & flag_mask)
+              color_index = color_table_index + BYTES_PER_COLOR;
+            else
+              color_index = color_table_index;
+            flag_mask >>= 1;
+
+            decoded[block_ptr++] = color_pairs[color_index + 0];
+            decoded[block_ptr++] = color_pairs[color_index + 1];
+            decoded[block_ptr++] = color_pairs[color_index + 2];
+            if (bytes_per_pixel == 4) /* 32bpp */
+              block_ptr++;
+          }
+          block_ptr += row_inc;
+        }
+        ADVANCE_BLOCK();
+      }
+      break;
+
+    // 4-color block encoding
+    case 0xA0:
+    case 0xB0:
+      n_blocks = (opcode & 0x0F) + 1;
+
+      // figure out which color quad to use to paint the 4-color block
+      if ((opcode & 0xF0) == 0xA0)
+      {
+        // fetch the next 4 colors from bytestream and store in next
+        // available entry in the color quad table
+        for (i = 0; i < CQUAD; i++)
+        {
+          color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
+          color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index + 
+            (i * BYTES_PER_COLOR);
+          color_quads[color_table_index + 0] = palette_map[color_index + 0];
+          color_quads[color_table_index + 1] = palette_map[color_index + 1];
+          color_quads[color_table_index + 2] = palette_map[color_index + 2];
+        }
+        // this is the base index to use for this block
+        color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index;
+        color_quad_index++;
+        if (color_quad_index == COLORS_PER_TABLE)
+          color_quad_index = 0;
+      }
+      else
+        color_table_index = CQUAD * BYTES_PER_COLOR * encoded[stream_ptr++];
+
+      while (n_blocks--)
+      {
+        color_flags = BE_32(&encoded[stream_ptr]);
+        stream_ptr += 4;
+        // flag mask actually acts as a bit shift count here
+        flag_mask = 30;
+        block_ptr = row_ptr + pixel_ptr;
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            color_index = color_table_index + (BYTES_PER_COLOR * 
+              ((color_flags >> flag_mask) & 0x03));
+            flag_mask -= 2;
+
+            decoded[block_ptr++] = color_quads[color_index + 0];
+            decoded[block_ptr++] = color_quads[color_index + 1];
+            decoded[block_ptr++] = color_quads[color_index + 2];
+            if (bytes_per_pixel == 4) /* 32bpp */
+              block_ptr++;
+          }
+          block_ptr += row_inc;
+        }
+        ADVANCE_BLOCK();
+      }
+      break;
+
+    // 8-color block encoding
+    case 0xC0:
+    case 0xD0:
+      n_blocks = (opcode & 0x0F) + 1;
+
+      // figure out which color octet to use to paint the 8-color block
+      if ((opcode & 0xF0) == 0xC0)
+      {
+        // fetch the next 8 colors from bytestream and store in next
+        // available entry in the color octet table
+        for (i = 0; i < COCTET; i++)
+        {
+          color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
+          color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index + 
+            (i * BYTES_PER_COLOR);
+          color_octets[color_table_index + 0] = palette_map[color_index + 0];
+          color_octets[color_table_index + 1] = palette_map[color_index + 1];
+          color_octets[color_table_index + 2] = palette_map[color_index + 2];
+        }
+        // this is the base index to use for this block
+        color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index;
+        color_octet_index++;
+        if (color_octet_index == COLORS_PER_TABLE)
+          color_octet_index = 0;
+      }
+      else
+        color_table_index = COCTET * BYTES_PER_COLOR * encoded[stream_ptr++];
+
+      while (n_blocks--)
+      {
+        /*
+          For this input:
+            01 23 45 67 89 AB
+          This is the output:
+            flags_a = xx012456, flags_b = xx89A37B
+        */
+        // build the color flags
+        color_flags_a = color_flags_b = 0;
+        color_flags_a =
+          (encoded[stream_ptr + 0] << 16) |
+          ((encoded[stream_ptr + 1] & 0xF0) << 8) |
+          ((encoded[stream_ptr + 2] & 0xF0) << 4) |
+          ((encoded[stream_ptr + 2] & 0x0F) << 4) |
+          ((encoded[stream_ptr + 3] & 0xF0) >> 4);
+        color_flags_b =
+          (encoded[stream_ptr + 4] << 16) |
+          ((encoded[stream_ptr + 5] & 0xF0) << 8) |
+          ((encoded[stream_ptr + 1] & 0x0F) << 8) |
+          ((encoded[stream_ptr + 3] & 0x0F) << 4) |
+          (encoded[stream_ptr + 5] & 0x0F);
+        stream_ptr += 6;
+
+        color_flags = color_flags_a;
+        // flag mask actually acts as a bit shift count here
+        flag_mask = 21;
+        block_ptr = row_ptr + pixel_ptr;
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          // reload flags at third row (iteration pixel_y == 2)
+          if (pixel_y == 2)
+          {
+            color_flags = color_flags_b;
+            flag_mask = 21;
+          }
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            color_index = color_table_index + (BYTES_PER_COLOR * 
+              ((color_flags >> flag_mask) & 0x07));
+            flag_mask -= 3;
+
+            decoded[block_ptr++] = color_octets[color_index + 0];
+            decoded[block_ptr++] = color_octets[color_index + 1];
+            decoded[block_ptr++] = color_octets[color_index + 2];
+            if (bytes_per_pixel == 4) /* 32bpp */
+              block_ptr++;
+          }
+          block_ptr += row_inc;
+        }
+        ADVANCE_BLOCK();
+      }
+      break;
+
+    // 16-color block encoding (every pixel is a different color)
+    case 0xE0:
+      n_blocks = (opcode & 0x0F) + 1;
+
+      while (n_blocks--)
+      {
+        block_ptr = row_ptr + pixel_ptr;
+        for (pixel_y = 0; pixel_y < 4; pixel_y++)
+        {
+          for (pixel_x = 0; pixel_x < 4; pixel_x++)
+          {
+            color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
+            decoded[block_ptr++] = palette_map[color_index + 0];
+            decoded[block_ptr++] = palette_map[color_index + 1];
+            decoded[block_ptr++] = palette_map[color_index + 2];
+            if (bytes_per_pixel == 4) /* 32bpp */
+              block_ptr++;
+          }
+          block_ptr += row_inc;
+        }
+        ADVANCE_BLOCK();
+      }
+      break;
+
+    case 0xF0:
+      mp_msg(MSGT_DECVIDEO, MSGL_HINT, "0xF0 opcode seen in SMC chunk (MPlayer developers would like to know)\n");
+      break;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/roqav.c	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,670 @@
+/*
+    RoQ A/V decoder for the MPlayer program
+    by Mike Melanson
+    based on Dr. Tim Ferguson's RoQ document and accompanying source
+    code found at:
+      http://www.csse.monash.edu.au/~timf/videocodec.html
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "config.h"
+#include "bswap.h"
+#include "mp_msg.h"
+#include "mp_image.h"
+
+#define LE_16(x) (le2me_16(*(unsigned short *)(x)))
+#define LE_32(x) (le2me_32(*(unsigned int *)(x)))
+
+#define CLAMP_S16(x)  if (x < -32768) x = -32768; \
+  else if (x > 32767) x = 32767;
+#define SE_16BIT(x)  if (x & 0x8000) x -= 0x10000;
+
+// RoQ chunk types
+#define RoQ_INFO           0x1001
+#define RoQ_QUAD_CODEBOOK  0x1002
+#define RoQ_QUAD_VQ        0x1011
+#define RoQ_SOUND_MONO     0x1020
+#define RoQ_SOUND_STEREO   0x1021
+
+#define MAX_ROQ_CODEBOOK_SIZE 256
+
+// codebook entry for 2x2 vector
+typedef struct
+{
+  // upper and lower luminance value pairs of 2x2 vector: [y0 y1], [y2 y3]
+  unsigned short v2_y_u;
+  unsigned short v2_y_l;
+
+  // chrominance components
+  unsigned char u, v;
+
+  // these variables are for rendering a doublesized 8x8 block; e.g.:
+  // v2_y_rows12 = [y0 y0 y1 y1]
+  // v2_y_rows34 = [y2 y2 y3 y3]
+  unsigned long v2d_y_rows_12;
+  unsigned long v2d_y_rows_34;
+  // ex: v2_u_row1 = [u u]
+  //     v2_v_row2 = [v v]
+  unsigned short v2d_u_rows_12;
+  unsigned short v2d_v_rows_12;
+
+  // maintain separate bytes for the luminance values as well
+  unsigned char y0, y1, y2, y3;
+} roq_v2_codebook;
+
+// codebook entry for 4x4 vector
+typedef struct
+{
+  unsigned char v2_index[4];
+} roq_v4_codebook;
+
+typedef struct
+{
+  roq_v2_codebook v2[MAX_ROQ_CODEBOOK_SIZE];
+  roq_v4_codebook v4[MAX_ROQ_CODEBOOK_SIZE];
+  mp_image_t *prev_frame;
+} roqvideo_info;
+
+
+// This function fills in the missing information for a v2 vector after
+// loading the Y, U and V values.
+inline void prep_v2(roq_v2_codebook *v2)
+{
+  v2->v2_y_u = be2me_16((v2->y0 << 8) | v2->y1);
+  v2->v2_y_l = be2me_16((v2->y2 << 8) | v2->y3);
+
+  v2->v2d_y_rows_12 = be2me_32((v2->y0 << 24) | (v2->y0 << 16) |
+    (v2->y1 << 8) | v2->y1);
+  v2->v2d_y_rows_34 = be2me_32((v2->y2 << 24) | (v2->y2 << 16) |
+    (v2->y3 << 8) | v2->y3);
+
+  // no reason to swap these for endianness since they're the same bytes
+  v2->v2d_u_rows_12 = (v2->u << 8) | v2->u;
+  v2->v2d_v_rows_12 = (v2->v << 8) | v2->v;
+}
+
+inline void paint_v2double_block(
+  unsigned char *y_plane,
+  unsigned char *u_plane,
+  unsigned char *v_plane,
+  roq_v2_codebook *v2,
+  unsigned int y_stride,
+  unsigned int u_stride,
+  unsigned int v_stride)
+{
+  // render the luminance components
+  *(unsigned int *)y_plane = v2->v2d_y_rows_12;
+  y_plane += y_stride;
+  *(unsigned int *)y_plane = v2->v2d_y_rows_12;
+  y_plane += y_stride;
+  *(unsigned int *)y_plane = v2->v2d_y_rows_34;
+  y_plane += y_stride;
+  *(unsigned int *)y_plane = v2->v2d_y_rows_34;
+
+  // render the color planes
+  *(unsigned short *)u_plane = v2->v2d_u_rows_12;
+  u_plane += u_stride;
+  *(unsigned short *)u_plane = v2->v2d_u_rows_12;
+
+  *(unsigned short *)v_plane = v2->v2d_v_rows_12;
+  v_plane += v_stride;
+  *(unsigned short *)v_plane = v2->v2d_v_rows_12;
+}
+
+inline void paint_v4_block(
+  unsigned char *y_plane,
+  unsigned char *u_plane,
+  unsigned char *v_plane,
+  unsigned int y_stride,
+  unsigned int u_stride,
+  unsigned int v_stride,
+  roq_v2_codebook *v2_a,
+  roq_v2_codebook *v2_b,
+  roq_v2_codebook *v2_c,
+  roq_v2_codebook *v2_d)
+{
+  // render luminance components
+  ((unsigned short *)y_plane)[0] = v2_a->v2_y_u;
+  ((unsigned short *)y_plane)[1] = v2_b->v2_y_u;
+
+  y_plane += y_stride;
+  ((unsigned short *)y_plane)[0] = v2_a->v2_y_l;
+  ((unsigned short *)y_plane)[1] = v2_b->v2_y_l;
+
+  y_plane += y_stride;
+  ((unsigned short *)y_plane)[0] = v2_c->v2_y_u;
+  ((unsigned short *)y_plane)[1] = v2_d->v2_y_u;
+
+  y_plane += y_stride;
+  ((unsigned short *)y_plane)[0] = v2_c->v2_y_l;
+  ((unsigned short *)y_plane)[1] = v2_d->v2_y_l;
+
+  // render the color planes
+  u_plane[0] = v2_a->u;
+  u_plane[1] = v2_b->u;
+  u_plane += u_stride;
+  u_plane[0] = v2_c->u;
+  u_plane[1] = v2_d->u;
+
+  v_plane[0] = v2_a->v;
+  v_plane[1] = v2_b->v;
+  v_plane += v_stride;
+  v_plane[0] = v2_c->v;
+  v_plane[1] = v2_d->v;
+}
+
+// This function copies the 4x4 block from the prev_*_planes to the
+// current *_planes.
+inline void copy_4x4_block(
+  unsigned char *y_plane,
+  unsigned char *u_plane,
+  unsigned char *v_plane,
+  unsigned char *prev_y_plane,
+  unsigned char *prev_u_plane,
+  unsigned char *prev_v_plane,
+  unsigned int y_stride,
+  unsigned int u_stride,
+  unsigned int v_stride)
+{
+  int i;
+
+  // copy over the luminance components (4 rows, 1 uint each)
+  for (i = 0; i < 4; i++)
+  {
+    *(unsigned int *)y_plane = *(unsigned int *)prev_y_plane;
+    y_plane += y_stride;
+    prev_y_plane += y_stride;
+  }
+
+  // copy the chrominance values
+  for (i = 0; i < 2; i++)
+  {
+    *(unsigned short*)u_plane = *(unsigned short*)prev_u_plane;
+    u_plane += u_stride;
+    prev_u_plane += u_stride;
+    *(unsigned short*)v_plane = *(unsigned short*)prev_v_plane;
+    v_plane += v_stride;
+    prev_v_plane += v_stride;
+  }
+}
+
+// This function copies the 8x8 block from the prev_*_planes to the
+// current *_planes.
+inline void copy_8x8_block(
+  unsigned char *y_plane,
+  unsigned char *u_plane,
+  unsigned char *v_plane,
+  unsigned char *prev_y_plane,
+  unsigned char *prev_u_plane,
+  unsigned char *prev_v_plane,
+  unsigned int y_stride,
+  unsigned int u_stride,
+  unsigned int v_stride)
+{
+  int i;
+
+  // copy over the luminance components (8 rows, 2 uints each)
+  for (i = 0; i < 8; i++)
+  {
+    ((unsigned int *)y_plane)[0] = ((unsigned int *)prev_y_plane)[0];
+    ((unsigned int *)y_plane)[1] = ((unsigned int *)prev_y_plane)[1];
+    y_plane += y_stride;
+    prev_y_plane += y_stride;
+  }
+
+  // copy the chrominance values
+  for (i = 0; i < 4; i++)
+  {
+    *(unsigned int*)u_plane = *(unsigned int*)prev_u_plane;
+    u_plane += u_stride;
+    prev_u_plane += u_stride;
+    *(unsigned int*)v_plane = *(unsigned int*)prev_v_plane;
+    v_plane += v_stride;
+    prev_v_plane += v_stride;
+  }
+}
+
+// This function creates storage space for the vector codebooks.
+void *roq_decode_video_init(void)
+{
+  roqvideo_info *info =
+    (roqvideo_info *)calloc(sizeof(roqvideo_info), 1);
+
+  info->prev_frame = NULL;
+
+  return info;
+}
+
+#define EMPTY_ROQ_CODEWORD 0xFFFF0000
+
+#define FETCH_NEXT_CODE() \
+  if (current_roq_codeword == EMPTY_ROQ_CODEWORD) \
+  { \
+    if (stream_ptr + 2 > encoded_size) \
+    { \
+      mp_msg(MSGT_DECVIDEO, MSGL_WARN,  \
+        "RoQ video: stream pointer just went out of bounds (1)\n"); \
+      return; \
+    } \
+    current_roq_codeword = (0x0000FFFF) | \
+      (encoded[stream_ptr + 0] << 16) | \
+      (encoded[stream_ptr + 1] << 24); \
+    stream_ptr += 2; \
+  } \
+  roq_code = ((current_roq_codeword >> 30) & 0x03); \
+  current_roq_codeword <<= 2;
+
+#define FETCH_NEXT_ARGUMENT() \
+  if (stream_ptr + 1 > encoded_size) \
+  { \
+    mp_msg(MSGT_DECVIDEO, MSGL_WARN,  \
+      "RoQ video: stream pointer just went out of bounds (2)\n"); \
+    return; \
+  } \
+  argument = encoded[stream_ptr++];
+
+#define CHECK_PREV_FRAME() \
+  if (!info->prev_frame) \
+  { \
+    mp_msg(MSGT_DECVIDEO, MSGL_WARN, \
+      "RoQ video: can't handle motion vector when there's no previous frame\n"); \
+    return; \
+  }
+
+void roq_decode_video(void *context, unsigned char *encoded,
+  int encoded_size, mp_image_t *mpi)
+{
+  roqvideo_info *info = (roqvideo_info *)context;
+
+  int stream_ptr = 0;
+  int i, j;
+  int chunk_length;
+  int v2_count;
+  int v4_count;
+
+  int roq_code;
+  unsigned int current_roq_codeword = EMPTY_ROQ_CODEWORD;
+  unsigned char argument = 0;
+  char mean_motion_x;
+  char mean_motion_y;
+  int mx, my; // for calculating the motion vector
+
+  int mblock_x = 0;
+  int mblock_y = 0;
+  int quad8_x, quad8_y;  // for pointing to 8x8 blocks in a macroblock
+  int quad4_x, quad4_y;  // for pointing to 4x4 blocks in an 8x8 block
+
+  unsigned char *y_plane;
+  unsigned char *u_plane;
+  unsigned char *v_plane;
+  unsigned char *prev_y_plane;
+  unsigned char *prev_u_plane;
+  unsigned char *prev_v_plane;
+  unsigned int y_stride = mpi->stride[0];
+  unsigned int u_stride = mpi->stride[1];
+  unsigned int v_stride = mpi->stride[2];
+
+  roq_v4_codebook v4;
+
+  // make sure the encoded chunk is of minimal acceptable length
+  if (encoded_size < 8)
+  {
+    mp_msg(MSGT_DECVIDEO, MSGL_WARN, 
+      "RoQ video: chunk isn't even 8 bytes long (minimum acceptable length)\n");
+    return;
+  }
+
+  // make sure the resolution checks out
+  if ((mpi->width % 16 != 0) || (mpi->height % 16 != 0))
+  {
+    mp_msg(MSGT_DECVIDEO, MSGL_WARN, 
+      "RoQ video resolution: %d x %d; expected dimensions divisible by 16\n");
+    return;
+  }
+
+  if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_CODEBOOK)
+  {
+    stream_ptr += 2;
+    chunk_length = LE_32(&encoded[stream_ptr]);
+    stream_ptr += 4;
+    v4_count = encoded[stream_ptr++];
+    v2_count = encoded[stream_ptr++];
+    if (v2_count == 0)
+      v2_count = 256;
+    if ((v4_count == 0) && (v2_count * 6 < chunk_length))
+      v4_count = 256;
+
+    // make sure the lengths agree with each other
+    if (((v2_count * 6) + (v4_count * 4)) != chunk_length)
+    {
+      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+        "RoQ video: encountered quad codebook chunk with weird lengths (1)\n");
+      return;
+    }
+    if ((v2_count * 6) > (encoded_size - stream_ptr))
+    {
+      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+        "RoQ video: encountered quad codebook chunk with weird lengths (2)\n");
+      return;
+    }
+
+    // load the 2x2 vectors
+    for (i = 0; i < v2_count; i++)
+    {
+      info->v2[i].y0 = encoded[stream_ptr++];
+      info->v2[i].y1 = encoded[stream_ptr++];
+      info->v2[i].y2 = encoded[stream_ptr++];
+      info->v2[i].y3 = encoded[stream_ptr++];
+      info->v2[i].u = encoded[stream_ptr++];
+      info->v2[i].v = encoded[stream_ptr++];
+      prep_v2(&info->v2[i]);
+    }
+
+    if ((v4_count * 4) > (encoded_size - stream_ptr))
+    {
+      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+        "RoQ video: encountered quad codebook chunk with weird lengths (3)\n");
+      return;
+    }
+
+    // load the 4x4 vectors
+    for (i = 0; i < v4_count; i++)
+    {
+      info->v4[i].v2_index[0] = encoded[stream_ptr++];
+      info->v4[i].v2_index[1] = encoded[stream_ptr++];
+      info->v4[i].v2_index[2] = encoded[stream_ptr++];
+      info->v4[i].v2_index[3] = encoded[stream_ptr++];
+    }
+  }
+
+  if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_VQ)
+  {
+    stream_ptr += 2;
+    chunk_length = LE_32(&encoded[stream_ptr]);
+    stream_ptr += 4;
+    mean_motion_y = encoded[stream_ptr++];
+    mean_motion_x = encoded[stream_ptr++];
+
+    // start by copying entire previous frame
+    if (info->prev_frame)
+    {
+      memcpy(mpi->planes[0], info->prev_frame->planes[0],
+        mpi->width * mpi->height);
+      memcpy(mpi->planes[1], info->prev_frame->planes[1],
+        (mpi->width * mpi->height) / 4);
+      memcpy(mpi->planes[2], info->prev_frame->planes[2], 
+        (mpi->width * mpi->height) / 4);
+    }
+
+    // iterate through the 16x16 macroblocks
+    for (mblock_y = 0; mblock_y < mpi->height; mblock_y += 16)
+    {
+      for (mblock_x = 0; mblock_x < mpi->width; mblock_x += 16)
+      {
+        // iterate through the 4 quadrants of the macroblock
+        for (i = 0; i < 4; i++)
+        {
+          quad8_x = mblock_x;
+          quad8_y = mblock_y;
+          if (i & 0x01) quad8_x += 8;
+          if (i & 0x02) quad8_y += 8;
+
+          // set up the planes
+          y_plane = mpi->planes[0] + quad8_y * y_stride + quad8_x;
+          u_plane = mpi->planes[1] + (quad8_y / 2) * u_stride + (quad8_x / 2);
+          v_plane = mpi->planes[2] + (quad8_y / 2) * v_stride + (quad8_x / 2);
+
+          // decide how to handle this 8x8 quad
+          FETCH_NEXT_CODE();
+          switch(roq_code)
+          {
+            // 8x8 block is the same as in the previous frame;
+            // skip it
+            case 0:
+              break;
+
+            // 8x8 block is painted with an 8x8 block from the last frame
+            // (i.e., motion compensation)
+            case 1:
+              CHECK_PREV_FRAME();
+
+              // prepare the pointers to the planes in the previous frame
+              FETCH_NEXT_ARGUMENT();  // argument contains motion vectors
+
+              // figure out the motion vectors
+              mx = quad8_x + 8 - (argument >> 4) - mean_motion_x;
+              my = quad8_y + 8 - (argument & 0x0F) - mean_motion_y;
+
+              prev_y_plane = info->prev_frame->planes[0] +
+                my * y_stride + mx;
+              prev_u_plane = info->prev_frame->planes[1] +
+                (my / 2) * u_stride + (mx + 1) / 2;
+              prev_v_plane = info->prev_frame->planes[2] +
+                (my / 2) * v_stride + (mx + 1) / 2;
+
+// sanity check before rendering
+              copy_8x8_block(
+                y_plane,
+                u_plane,
+                v_plane,
+                prev_y_plane,
+                prev_u_plane,
+                prev_v_plane,
+                y_stride,
+                u_stride,
+                v_stride
+              );
+              break;
+
+            // 8x8 block is painted with a doublesized 4x4 vector
+            case 2:
+              FETCH_NEXT_ARGUMENT();
+              v4 = info->v4[argument];
+
+// sanity check before rendering
+              // iterate through 4 4x4 blocks
+              for (j = 0; j < 4; j++)
+              {
+                quad4_x = quad8_x;
+                quad4_y = quad8_y;
+                if (j & 0x01) quad4_x += 4;
+                if (j & 0x02) quad4_y += 4;
+
+                // set up the planes
+                y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x;
+                u_plane = mpi->planes[1] + 
+                  (quad4_y / 2) * u_stride + (quad4_x / 2);
+                v_plane = mpi->planes[2] + 
+                  (quad4_y / 2) * v_stride + (quad4_x / 2);
+
+                paint_v2double_block(
+                  y_plane,
+                  u_plane,
+                  v_plane,
+                  &info->v2[v4.v2_index[j]],
+                  y_stride,
+                  u_stride,
+                  v_stride
+                );
+              }
+              break;
+
+            // 8x8 block is broken down into 4 4x4 blocks and painted using
+            // 4 different codes.
+            case 3:
+              // iterate through 4 4x4 blocks
+              for (j = 0; j < 4; j++)
+              {
+                quad4_x = quad8_x;
+                quad4_y = quad8_y;
+                if (j & 0x01) quad4_x += 4;
+                if (j & 0x02) quad4_y += 4;
+
+                // set up the planes
+                y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x;
+                u_plane = mpi->planes[1] + 
+                  (quad4_y / 2) * u_stride + (quad4_x / 2);
+                v_plane = mpi->planes[2] + 
+                  (quad4_y / 2) * v_stride + (quad4_x / 2);
+
+                // decide how to handle this 4x4 quad
+                FETCH_NEXT_CODE();
+                switch(roq_code)
+                {
+                  // 4x4 block is the same as in the previous frame;
+                  // skip it
+                  case 0:
+                    break;
+
+                  // 4x4 block is motion compensated from the previous frame
+                  case 1:
+                    CHECK_PREV_FRAME();
+                    // prepare the pointers to the planes in the previous frame
+                    FETCH_NEXT_ARGUMENT();  // argument contains motion vectors
+
+                    // figure out the motion vectors
+                    mx = quad4_x + 8 - (argument >> 4) - mean_motion_x;
+                    my = quad4_y + 8 - (argument & 0x0F) - mean_motion_y;
+
+                    prev_y_plane = info->prev_frame->planes[0] +
+                      my * y_stride + mx;
+                    prev_u_plane = info->prev_frame->planes[1] +
+                     (my / 2) * u_stride + (mx + 1) / 2;
+                    prev_v_plane = info->prev_frame->planes[2] +
+                     (my / 2) * u_stride + (mx + 1) / 2;
+
+// sanity check before rendering
+                    copy_4x4_block(
+                      y_plane,
+                      u_plane,
+                      v_plane,
+                      prev_y_plane,
+                      prev_u_plane,
+                      prev_v_plane,
+                      y_stride,
+                      u_stride,
+                      v_stride
+                    );
+                    break;
+
+                  // 4x4 block is copied directly from v4 vector table
+                  case 2:
+                    FETCH_NEXT_ARGUMENT();
+                    v4 = info->v4[argument];
+
+                    paint_v4_block(
+                      y_plane,
+                      u_plane,
+                      v_plane,
+                      y_stride,
+                      u_stride,
+                      v_stride,
+                      &info->v2[v4.v2_index[0]],
+                      &info->v2[v4.v2_index[1]],
+                      &info->v2[v4.v2_index[2]],
+                      &info->v2[v4.v2_index[3]]);
+                    break;
+
+                  // 4x4 block is built from 4 2x2 vectors
+                  case 3:
+                    if (stream_ptr + 4 > encoded_size)
+                    {
+                      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+                        "RoQ video: stream pointer just went out of bounds (2)\n");
+                      return;
+                    }
+                    paint_v4_block(
+                      y_plane,
+                      u_plane,
+                      v_plane,
+                      y_stride,
+                      u_stride,
+                      v_stride,
+                      &info->v2[encoded[stream_ptr + 0]],
+                      &info->v2[encoded[stream_ptr + 1]],
+                      &info->v2[encoded[stream_ptr + 2]],
+                      &info->v2[encoded[stream_ptr + 3]]);
+                    stream_ptr += 4;
+                    break;
+                }
+              }
+              break;
+          }
+        }
+      }
+    }
+  }
+
+  // one last sanity check on the way out
+  // (apparently, it's not unusual to have 2 bytes left over after decode)
+  if (stream_ptr < encoded_size - 2)
+  {
+      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
+        "RoQ video: completed frame decode with bytes left over (%d < %d)\n",
+          stream_ptr, encoded_size);
+  }
+
+  // save the current frame as the previous frame for the next iteration
+  info->prev_frame = mpi;
+}
+
+// Initialize the RoQ audio decoder, which is to say, initialize the table
+// of squares.
+void *roq_decode_audio_init(void)
+{
+  short *square_array;
+  short square;
+  int i;
+
+  square_array = (short *)malloc(256 * sizeof(short));
+  if (!square_array)
+    return NULL;
+
+  for (i = 0; i < 128; i++)
+  {
+    square = i * i;
+    square_array[i] = square;
+    square_array[i + 128] = -square;
+  }
+
+  return square_array;
+}
+
+int roq_decode_audio(
+  unsigned short *output,
+  unsigned char *input,
+  int encoded_size,
+  int channels,
+  void *context)
+{
+  short *square_array = (short *)context;
+  int i;
+  int predictor[2];
+  int channel_number = 0;
+
+  // prepare the initial predictors
+  if (channels == 1)
+    predictor[0] = LE_16(&input[0]);
+  else
+  {
+    predictor[0] = input[1] << 8;
+    predictor[1] = input[0] << 8;
+  }
+  SE_16BIT(predictor[0]);
+  SE_16BIT(predictor[1]);
+
+  // decode the samples
+  for (i = 2; i < encoded_size; i++)
+  {
+    predictor[channel_number] += square_array[input[i]];
+    CLAMP_S16(predictor[channel_number]);
+    output[i - 2] = predictor[channel_number];
+
+    // toggle channel
+    channel_number ^= channels - 1;
+  }
+
+  // return the number of samples decoded
+  return (encoded_size - 2);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libmpcodecs/native/roqav.h	Sat Apr 13 18:03:02 2002 +0000
@@ -0,0 +1,12 @@
+#ifndef ROQAV_H
+#define ROQAV_H
+
+void *roq_decode_video_init(void);
+void roq_decode_video(void *context, unsigned char *encoded, 
+  int encoded_size, mp_image_t *mpi);
+
+void *roq_decode_audio_init(void);
+int roq_decode_audio(unsigned short *output, unsigned char *input,
+  int encoded_size, int channels, void *context);
+
+#endif // ROQAV_H
--- a/lzoconf.h	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,386 +0,0 @@
-/* lzoconf.h -- configuration for the LZO real-time data compression library
-
-   This file is part of the LZO real-time data compression library.
-
-   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
-
-   The LZO library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of
-   the License, or (at your option) any later version.
-
-   The LZO library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with the LZO library; see the file COPYING.
-   If not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-   Markus F.X.J. Oberhumer
-   <markus.oberhumer@jk.uni-linz.ac.at>
-   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
- */
-
-
-#ifndef __LZOCONF_H
-#define __LZOCONF_H
-
-#define LZO_VERSION             0x1070
-#define LZO_VERSION_STRING      "1.07"
-#define LZO_VERSION_DATE        "Oct 18 2000"
-
-/* internal Autoconf configuration file - only used when building LZO */
-#if defined(LZO_HAVE_CONFIG_H)
-#  include <config.h>
-#endif
-#include <limits.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/***********************************************************************
-// LZO requires a conforming <limits.h>
-************************************************************************/
-
-#if !defined(CHAR_BIT) || (CHAR_BIT != 8)
-#  error "invalid CHAR_BIT"
-#endif
-#if !defined(UCHAR_MAX) || !defined(UINT_MAX) || !defined(ULONG_MAX)
-#  error "check your compiler installation"
-#endif
-#if (USHRT_MAX < 1) || (UINT_MAX < 1) || (ULONG_MAX < 1)
-#  error "your limits.h macros are broken"
-#endif
-
-/* workaround a cpp bug under hpux 10.20 */
-#define LZO_0xffffffffL     4294967295ul
-
-
-/***********************************************************************
-// architecture defines
-************************************************************************/
-
-#if !defined(__LZO_WIN) && !defined(__LZO_DOS) && !defined(__LZO_OS2)
-#  if defined(__WINDOWS__) || defined(_WINDOWS) || defined(_Windows)
-#    define __LZO_WIN
-#  elif defined(__WIN32__) || defined(_WIN32) || defined(WIN32)
-#    define __LZO_WIN
-#  elif defined(__NT__) || defined(__NT_DLL__) || defined(__WINDOWS_386__)
-#    define __LZO_WIN
-#  elif defined(__DOS__) || defined(__MSDOS__) || defined(MSDOS)
-#    define __LZO_DOS
-#  elif defined(__OS2__) || defined(__OS2V2__) || defined(OS2)
-#    define __LZO_OS2
-#  elif defined(__palmos__)
-#    define __LZO_PALMOS
-#  elif defined(__TOS__) || defined(__atarist__)
-#    define __LZO_TOS
-#  endif
-#endif
-
-#if (UINT_MAX < LZO_0xffffffffL)
-#  if defined(__LZO_WIN)
-#    define __LZO_WIN16
-#  elif defined(__LZO_DOS)
-#    define __LZO_DOS16
-#  elif defined(__LZO_PALMOS)
-#    define __LZO_PALMOS16
-#  elif defined(__LZO_TOS)
-#    define __LZO_TOS16
-#  elif defined(__C166__)
-#  else
-#    error "16-bit target not supported - contact me for porting hints"
-#  endif
-#endif
-
-#if !defined(__LZO_i386)
-#  if defined(__LZO_DOS) || defined(__LZO_WIN16)
-#    define __LZO_i386
-#  elif defined(__i386__) || defined(__386__) || defined(_M_IX86)
-#    define __LZO_i386
-#  endif
-#endif
-
-#if defined(__LZO_STRICT_16BIT)
-#  if (UINT_MAX < LZO_0xffffffffL)
-#    include <lzo16bit.h>
-#  endif
-#endif
-
-/* memory checkers */
-#if !defined(__LZO_CHECKER)
-#  if defined(__BOUNDS_CHECKING_ON)
-#    define __LZO_CHECKER
-#  elif defined(__CHECKER__)
-#    define __LZO_CHECKER
-#  elif defined(__INSURE__)
-#    define __LZO_CHECKER
-#  elif defined(__PURIFY__)
-#    define __LZO_CHECKER
-#  endif
-#endif
-
-
-/***********************************************************************
-// integral and pointer types
-************************************************************************/
-
-/* Integral types with 32 bits or more */
-#if !defined(LZO_UINT32_MAX)
-#  if (UINT_MAX >= LZO_0xffffffffL)
-     typedef unsigned int       lzo_uint32;
-     typedef int                lzo_int32;
-#    define LZO_UINT32_MAX      UINT_MAX
-#    define LZO_INT32_MAX       INT_MAX
-#    define LZO_INT32_MIN       INT_MIN
-#  elif (ULONG_MAX >= LZO_0xffffffffL)
-     typedef unsigned long      lzo_uint32;
-     typedef long               lzo_int32;
-#    define LZO_UINT32_MAX      ULONG_MAX
-#    define LZO_INT32_MAX       LONG_MAX
-#    define LZO_INT32_MIN       LONG_MIN
-#  else
-#    error "lzo_uint32"
-#  endif
-#endif
-
-/* lzo_uint is used like size_t */
-#if !defined(LZO_UINT_MAX)
-#  if (UINT_MAX >= LZO_0xffffffffL)
-     typedef unsigned int       lzo_uint;
-     typedef int                lzo_int;
-#    define LZO_UINT_MAX        UINT_MAX
-#    define LZO_INT_MAX         INT_MAX
-#    define LZO_INT_MIN         INT_MIN
-#  elif (ULONG_MAX >= LZO_0xffffffffL)
-     typedef unsigned long      lzo_uint;
-     typedef long               lzo_int;
-#    define LZO_UINT_MAX        ULONG_MAX
-#    define LZO_INT_MAX         LONG_MAX
-#    define LZO_INT_MIN         LONG_MIN
-#  else
-#    error "lzo_uint"
-#  endif
-#endif
-
-
-/* Memory model that allows to access memory at offsets of lzo_uint. */
-#if !defined(__LZO_MMODEL)
-#  if (LZO_UINT_MAX <= UINT_MAX)
-#    define __LZO_MMODEL
-#  elif defined(__LZO_DOS16) || defined(__LZO_WIN16)
-#    define __LZO_MMODEL        __huge
-#    define LZO_999_UNSUPPORTED
-#  elif defined(__LZO_PALMOS16) || defined(__LZO_TOS16)
-#    define __LZO_MMODEL
-#  else
-#    error "__LZO_MMODEL"
-#  endif
-#endif
-
-/* no typedef here because of const-pointer issues */
-#define lzo_byte                unsigned char __LZO_MMODEL
-#define lzo_bytep               unsigned char __LZO_MMODEL *
-#define lzo_charp               char __LZO_MMODEL *
-#define lzo_voidp               void __LZO_MMODEL *
-#define lzo_shortp              short __LZO_MMODEL *
-#define lzo_ushortp             unsigned short __LZO_MMODEL *
-#define lzo_uint32p             lzo_uint32 __LZO_MMODEL *
-#define lzo_int32p              lzo_int32 __LZO_MMODEL *
-#define lzo_uintp               lzo_uint __LZO_MMODEL *
-#define lzo_intp                lzo_int __LZO_MMODEL *
-#define lzo_voidpp              lzo_voidp __LZO_MMODEL *
-#define lzo_bytepp              lzo_bytep __LZO_MMODEL *
-
-typedef int lzo_bool;
-
-#ifndef lzo_sizeof_dict_t
-#  define lzo_sizeof_dict_t     sizeof(lzo_bytep)
-#endif
-
-
-/***********************************************************************
-// function types
-************************************************************************/
-
-/* linkage */
-#if !defined(__LZO_EXTERN_C)
-#  ifdef __cplusplus
-#    define __LZO_EXTERN_C      extern "C"
-#  else
-#    define __LZO_EXTERN_C      extern
-#  endif
-#endif
-
-/* calling conventions */
-#if !defined(__LZO_CDECL)
-#  if defined(__LZO_DOS16) || defined(__LZO_WIN16)
-#    define __LZO_CDECL         __far __cdecl
-#  elif defined(__LZO_i386) && defined(_MSC_VER)
-#    define __LZO_CDECL         __cdecl
-#  elif defined(__LZO_i386) && defined(__WATCOMC__)
-#    define __LZO_CDECL         __near __cdecl
-#  else
-#    define __LZO_CDECL
-#  endif
-#endif
-#if !defined(__LZO_ENTRY)
-#  define __LZO_ENTRY           __LZO_CDECL
-#endif
-
-/* DLL export information */
-#if !defined(__LZO_EXPORT1)
-#  define __LZO_EXPORT1
-#endif
-#if !defined(__LZO_EXPORT2)
-#  define __LZO_EXPORT2
-#endif
-
-/* calling convention for C functions */
-#if !defined(LZO_PUBLIC)
-#  define LZO_PUBLIC(_rettype)  __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_ENTRY
-#endif
-#if !defined(LZO_EXTERN)
-#  define LZO_EXTERN(_rettype)  __LZO_EXTERN_C LZO_PUBLIC(_rettype)
-#endif
-#if !defined(LZO_PRIVATE)
-#  define LZO_PRIVATE(_rettype) static _rettype __LZO_ENTRY
-#endif
-
-/* cdecl calling convention for assembler functions */
-#if !defined(LZO_PUBLIC_CDECL)
-#  define LZO_PUBLIC_CDECL(_rettype) \
-                __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_CDECL
-#endif
-#if !defined(LZO_EXTERN_CDECL)
-#  define LZO_EXTERN_CDECL(_rettype)  __LZO_EXTERN_C LZO_PUBLIC_CDECL(_rettype)
-#endif
-
-
-typedef int
-(__LZO_ENTRY *lzo_compress_t)   ( const lzo_byte *src, lzo_uint  src_len,
-                                        lzo_byte *dst, lzo_uint *dst_len,
-                                        lzo_voidp wrkmem );
-
-typedef int
-(__LZO_ENTRY *lzo_decompress_t) ( const lzo_byte *src, lzo_uint  src_len,
-                                        lzo_byte *dst, lzo_uint *dst_len,
-                                        lzo_voidp wrkmem );
-
-typedef int
-(__LZO_ENTRY *lzo_optimize_t)   (       lzo_byte *src, lzo_uint  src_len,
-                                        lzo_byte *dst, lzo_uint *dst_len,
-                                        lzo_voidp wrkmem );
-
-typedef int
-(__LZO_ENTRY *lzo_compress_dict_t)(const lzo_byte *src, lzo_uint  src_len,
-                                        lzo_byte *dst, lzo_uint *dst_len,
-                                        lzo_voidp wrkmem,
-                                  const lzo_byte *dict, lzo_uint dict_len );
-
-typedef int
-(__LZO_ENTRY *lzo_decompress_dict_t)(const lzo_byte *src, lzo_uint  src_len,
-                                        lzo_byte *dst, lzo_uint *dst_len,
-                                        lzo_voidp wrkmem,
-                                  const lzo_byte *dict, lzo_uint dict_len );
-
-
-/* a progress indicator callback function */
-typedef void (__LZO_ENTRY *lzo_progress_callback_t) (lzo_uint, lzo_uint);
-
-
-/***********************************************************************
-// error codes and prototypes
-************************************************************************/
-
-/* Error codes for the compression/decompression functions. Negative
- * values are errors, positive values will be used for special but
- * normal events.
- */
-#define LZO_E_OK                    0
-#define LZO_E_ERROR                 (-1)
-#define LZO_E_OUT_OF_MEMORY         (-2)    /* not used right now */
-#define LZO_E_NOT_COMPRESSIBLE      (-3)    /* not used right now */
-#define LZO_E_INPUT_OVERRUN         (-4)
-#define LZO_E_OUTPUT_OVERRUN        (-5)
-#define LZO_E_LOOKBEHIND_OVERRUN    (-6)
-#define LZO_E_EOF_NOT_FOUND         (-7)
-#define LZO_E_INPUT_NOT_CONSUMED    (-8)
-
-
-/* lzo_init() should be the first function you call.
- * Check the return code !
- *
- * lzo_init() is a macro to allow checking that the library and the
- * compiler's view of various types are consistent.
- */
-#define lzo_init() __lzo_init2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\
-    (int)sizeof(long),(int)sizeof(lzo_uint32),(int)sizeof(lzo_uint),\
-    (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\
-    (int)sizeof(lzo_compress_t))
-LZO_EXTERN(int) __lzo_init2(unsigned,int,int,int,int,int,int,int,int,int);
-
-/* version functions (useful for shared libraries) */
-LZO_EXTERN(unsigned) lzo_version(void);
-LZO_EXTERN(const char *) lzo_version_string(void);
-LZO_EXTERN(const char *) lzo_version_date(void);
-LZO_EXTERN(const lzo_charp) _lzo_version_string(void);
-LZO_EXTERN(const lzo_charp) _lzo_version_date(void);
-
-/* string functions */
-LZO_EXTERN(int)
-lzo_memcmp(const lzo_voidp _s1, const lzo_voidp _s2, lzo_uint _len);
-LZO_EXTERN(lzo_voidp)
-lzo_memcpy(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len);
-LZO_EXTERN(lzo_voidp)
-lzo_memmove(lzo_voidp _dest, const lzo_voidp _src, lzo_uint _len);
-LZO_EXTERN(lzo_voidp)
-lzo_memset(lzo_voidp _s, int _c, lzo_uint _len);
-
-/* checksum functions */
-LZO_EXTERN(lzo_uint32)
-lzo_adler32(lzo_uint32 _adler, const lzo_byte *_buf, lzo_uint _len);
-LZO_EXTERN(lzo_uint32)
-lzo_crc32(lzo_uint32 _c, const lzo_byte *_buf, lzo_uint _len);
-
-/* memory allocation functions */
-LZO_EXTERN(lzo_bytep) lzo_alloc(lzo_uint _nelems, lzo_uint _size);
-LZO_EXTERN(lzo_bytep) lzo_malloc(lzo_uint _size);
-LZO_EXTERN(void) lzo_free(lzo_voidp _ptr);
-
-typedef lzo_bytep (__LZO_ENTRY *lzo_alloc_hook_t) (lzo_uint, lzo_uint);
-typedef void (__LZO_ENTRY *lzo_free_hook_t) (lzo_voidp);
-
-extern lzo_alloc_hook_t lzo_alloc_hook;
-extern lzo_free_hook_t lzo_free_hook;
-
-/* misc. */
-LZO_EXTERN(lzo_bool) lzo_assert(int _expr);
-LZO_EXTERN(int) _lzo_config_check(void);
-typedef union { lzo_bytep p; lzo_uint u; } __lzo_pu_u;
-typedef union { lzo_bytep p; lzo_uint32 u32; } __lzo_pu32_u;
-
-/* align a char pointer on a boundary that is a multiple of `size' */
-LZO_EXTERN(unsigned) __lzo_align_gap(const lzo_voidp _ptr, lzo_uint _size);
-#define LZO_PTR_ALIGN_UP(_ptr,_size) \
-    ((_ptr) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(_ptr),(lzo_uint)(_size)))
-
-/* deprecated - only for backward compatibility */
-#define LZO_ALIGN(_ptr,_size) LZO_PTR_ALIGN_UP(_ptr,_size)
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* already included */
-
--- a/minilzo.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2848 +0,0 @@
-/* minilzo.c -- mini subset of the LZO real-time data compression library
-
-   This file is part of the LZO real-time data compression library.
-
-   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
-
-   The LZO library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of
-   the License, or (at your option) any later version.
-
-   The LZO library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with the LZO library; see the file COPYING.
-   If not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-   Markus F.X.J. Oberhumer
-   <markus.oberhumer@jk.uni-linz.ac.at>
-   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
- */
-
-/*
- * NOTE:
- *   the full LZO package can be found at
- *   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
- */
-
-#define __LZO_IN_MINILZO
-
-#ifdef MINILZO_HAVE_CONFIG_H
-#  include <config.h>
-#endif
-
-#undef LZO_HAVE_CONFIG_H
-#include "minilzo.h"
-
-#if !defined(MINILZO_VERSION) || (MINILZO_VERSION != 0x1070)
-#  error "version mismatch in miniLZO source files"
-#endif
-
-#ifdef MINILZO_HAVE_CONFIG_H
-#  define LZO_HAVE_CONFIG_H
-#endif
-
-#if !defined(LZO_NO_SYS_TYPES_H)
-#  include <sys/types.h>
-#endif
-#include <stdio.h>
-
-#ifndef __LZO_CONF_H
-#define __LZO_CONF_H
-
-#if !defined(__LZO_IN_MINILZO)
-#  ifndef __LZOCONF_H
-#    include <lzoconf.h>
-#  endif
-#endif
-
-#if defined(__BOUNDS_CHECKING_ON)
-#  include <unchecked.h>
-#else
-#  define BOUNDS_CHECKING_OFF_DURING(stmt)      stmt
-#  define BOUNDS_CHECKING_OFF_IN_EXPR(expr)     (expr)
-#endif
-
-#if !defined(LZO_HAVE_CONFIG_H)
-#  include <stddef.h>
-#  include <string.h>
-#  if !defined(NO_STDLIB_H)
-#    include <stdlib.h>
-#  endif
-#  define HAVE_MEMCMP
-#  define HAVE_MEMCPY
-#  define HAVE_MEMMOVE
-#  define HAVE_MEMSET
-#else
-#  include <sys/types.h>
-#  if defined(STDC_HEADERS)
-#    include <string.h>
-#    include <stdlib.h>
-#  endif
-#  if defined(HAVE_STDDEF_H)
-#    include <stddef.h>
-#  endif
-#  if defined(HAVE_MEMORY_H)
-#    include <memory.h>
-#  endif
-#endif
-
-#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
-#  define HAVE_MALLOC_H
-#  define HAVE_HALLOC
-#endif
-
-#undef NDEBUG
-#if !defined(LZO_DEBUG)
-#  define NDEBUG
-#endif
-#if defined(LZO_DEBUG) || !defined(NDEBUG)
-#  if !defined(NO_STDIO_H)
-#    include <stdio.h>
-#  endif
-#endif
-#include <assert.h>
-
-#if !defined(LZO_UNUSED)
-#  define LZO_UNUSED(parm)  (parm = parm)
-#endif
-
-#if !defined(__inline__) && !defined(__GNUC__)
-#  if defined(__cplusplus)
-#    define __inline__      inline
-#  else
-#    define __inline__
-#  endif
-#endif
-
-#if defined(NO_MEMCMP)
-#  undef HAVE_MEMCMP
-#endif
-
-#if !defined(HAVE_MEMCMP)
-#  undef memcmp
-#  define memcmp    lzo_memcmp
-#endif
-#if !defined(HAVE_MEMCPY)
-#  undef memcpy
-#  define memcpy    lzo_memcpy
-#endif
-#if !defined(HAVE_MEMMOVE)
-#  undef memmove
-#  define memmove   lzo_memmove
-#endif
-#if !defined(HAVE_MEMSET)
-#  undef memset
-#  define memset    lzo_memset
-#endif
-
-#if 1
-#  define LZO_BYTE(x)       ((unsigned char) (x))
-#else
-#  define LZO_BYTE(x)       ((unsigned char) ((x) & 0xff))
-#endif
-#if 0
-#  define LZO_USHORT(x)     ((unsigned short) (x))
-#else
-#  define LZO_USHORT(x)     ((unsigned short) ((x) & 0xffff))
-#endif
-
-#define LZO_MAX(a,b)        ((a) >= (b) ? (a) : (b))
-#define LZO_MIN(a,b)        ((a) <= (b) ? (a) : (b))
-#define LZO_MAX3(a,b,c)     ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c))
-#define LZO_MIN3(a,b,c)     ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c))
-
-#define lzo_sizeof(type)    ((lzo_uint) (sizeof(type)))
-
-#define LZO_HIGH(array)     ((lzo_uint) (sizeof(array)/sizeof(*(array))))
-
-#define LZO_SIZE(bits)      (1u << (bits))
-#define LZO_MASK(bits)      (LZO_SIZE(bits) - 1)
-
-#define LZO_LSIZE(bits)     (1ul << (bits))
-#define LZO_LMASK(bits)     (LZO_LSIZE(bits) - 1)
-
-#define LZO_USIZE(bits)     ((lzo_uint) 1 << (bits))
-#define LZO_UMASK(bits)     (LZO_USIZE(bits) - 1)
-
-#define LZO_STYPE_MAX(b)    (((1l  << (8*(b)-2)) - 1l)  + (1l  << (8*(b)-2)))
-#define LZO_UTYPE_MAX(b)    (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1)))
-
-#if !defined(SIZEOF_UNSIGNED)
-#  if (UINT_MAX == 0xffff)
-#    define SIZEOF_UNSIGNED         2
-#  elif (UINT_MAX == LZO_0xffffffffL)
-#    define SIZEOF_UNSIGNED         4
-#  elif (UINT_MAX >= LZO_0xffffffffL)
-#    define SIZEOF_UNSIGNED         8
-#  else
-#    error SIZEOF_UNSIGNED
-#  endif
-#endif
-
-#if !defined(SIZEOF_UNSIGNED_LONG)
-#  if (ULONG_MAX == LZO_0xffffffffL)
-#    define SIZEOF_UNSIGNED_LONG    4
-#  elif (ULONG_MAX >= LZO_0xffffffffL)
-#    define SIZEOF_UNSIGNED_LONG    8
-#  else
-#    error SIZEOF_UNSIGNED_LONG
-#  endif
-#endif
-
-#if !defined(SIZEOF_SIZE_T)
-#  define SIZEOF_SIZE_T             SIZEOF_UNSIGNED
-#endif
-#if !defined(SIZE_T_MAX)
-#  define SIZE_T_MAX                LZO_UTYPE_MAX(SIZEOF_SIZE_T)
-#endif
-
-#if 1 && defined(__LZO_i386) && (UINT_MAX == LZO_0xffffffffL)
-#  if !defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX == 0xffff)
-#    define LZO_UNALIGNED_OK_2
-#  endif
-#  if !defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX == LZO_0xffffffffL)
-#    define LZO_UNALIGNED_OK_4
-#  endif
-#endif
-
-#if defined(LZO_UNALIGNED_OK_2) || defined(LZO_UNALIGNED_OK_4)
-#  if !defined(LZO_UNALIGNED_OK)
-#    define LZO_UNALIGNED_OK
-#  endif
-#endif
-
-#if defined(__LZO_NO_UNALIGNED)
-#  undef LZO_UNALIGNED_OK
-#  undef LZO_UNALIGNED_OK_2
-#  undef LZO_UNALIGNED_OK_4
-#endif
-
-#if defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX != 0xffff)
-#  error "LZO_UNALIGNED_OK_2 must not be defined on this system"
-#endif
-#if defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
-#  error "LZO_UNALIGNED_OK_4 must not be defined on this system"
-#endif
-
-#if defined(__LZO_NO_ALIGNED)
-#  undef LZO_ALIGNED_OK_4
-#endif
-
-#if defined(LZO_ALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
-#  error "LZO_ALIGNED_OK_4 must not be defined on this system"
-#endif
-
-#define LZO_LITTLE_ENDIAN       1234
-#define LZO_BIG_ENDIAN          4321
-#define LZO_PDP_ENDIAN          3412
-
-#if !defined(LZO_BYTE_ORDER)
-#  if defined(MFX_BYTE_ORDER)
-#    define LZO_BYTE_ORDER      MFX_BYTE_ORDER
-#  elif defined(__LZO_i386)
-#    define LZO_BYTE_ORDER      LZO_LITTLE_ENDIAN
-#  elif defined(BYTE_ORDER)
-#    define LZO_BYTE_ORDER      BYTE_ORDER
-#  elif defined(__BYTE_ORDER)
-#    define LZO_BYTE_ORDER      __BYTE_ORDER
-#  endif
-#endif
-
-#if defined(LZO_BYTE_ORDER)
-#  if (LZO_BYTE_ORDER != LZO_LITTLE_ENDIAN) && \
-      (LZO_BYTE_ORDER != LZO_BIG_ENDIAN)
-#    error "invalid LZO_BYTE_ORDER"
-#  endif
-#endif
-
-#if defined(LZO_UNALIGNED_OK) && !defined(LZO_BYTE_ORDER)
-#  error "LZO_BYTE_ORDER is not defined"
-#endif
-
-#define LZO_OPTIMIZE_GNUC_i386_IS_BUGGY
-
-#if defined(NDEBUG) && !defined(LZO_DEBUG) && !defined(__LZO_CHECKER)
-#  if defined(__GNUC__) && defined(__i386__)
-#    if !defined(LZO_OPTIMIZE_GNUC_i386_IS_BUGGY)
-#      define LZO_OPTIMIZE_GNUC_i386
-#    endif
-#  endif
-#endif
-
-__LZO_EXTERN_C int __lzo_init_done;
-__LZO_EXTERN_C const lzo_byte __lzo_copyright[];
-LZO_EXTERN(const lzo_byte *) lzo_copyright(void);
-__LZO_EXTERN_C const lzo_uint32 _lzo_crc32_table[256];
-
-#define _LZO_STRINGIZE(x)           #x
-#define _LZO_MEXPAND(x)             _LZO_STRINGIZE(x)
-
-#define _LZO_CONCAT2(a,b)           a ## b
-#define _LZO_CONCAT3(a,b,c)         a ## b ## c
-#define _LZO_CONCAT4(a,b,c,d)       a ## b ## c ## d
-#define _LZO_CONCAT5(a,b,c,d,e)     a ## b ## c ## d ## e
-
-#define _LZO_ECONCAT2(a,b)          _LZO_CONCAT2(a,b)
-#define _LZO_ECONCAT3(a,b,c)        _LZO_CONCAT3(a,b,c)
-#define _LZO_ECONCAT4(a,b,c,d)      _LZO_CONCAT4(a,b,c,d)
-#define _LZO_ECONCAT5(a,b,c,d,e)    _LZO_CONCAT5(a,b,c,d,e)
-
-#if 0
-
-#define __LZO_IS_COMPRESS_QUERY(i,il,o,ol,w)    ((lzo_voidp)(o) == (w))
-#define __LZO_QUERY_COMPRESS(i,il,o,ol,w,n,s) \
-		(*ol = (n)*(s), LZO_E_OK)
-
-#define __LZO_IS_DECOMPRESS_QUERY(i,il,o,ol,w)  ((lzo_voidp)(o) == (w))
-#define __LZO_QUERY_DECOMPRESS(i,il,o,ol,w,n,s) \
-		(*ol = (n)*(s), LZO_E_OK)
-
-#define __LZO_IS_OPTIMIZE_QUERY(i,il,o,ol,w)    ((lzo_voidp)(o) == (w))
-#define __LZO_QUERY_OPTIMIZE(i,il,o,ol,w,n,s) \
-		(*ol = (n)*(s), LZO_E_OK)
-
-#endif
-
-#ifndef __LZO_PTR_H
-#define __LZO_PTR_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
-#  include <dos.h>
-#  if 1 && defined(__WATCOMC__)
-#    include <i86.h>
-     __LZO_EXTERN_C unsigned char _HShift;
-#    define __LZO_HShift    _HShift
-#  elif 1 && defined(_MSC_VER)
-     __LZO_EXTERN_C unsigned short __near _AHSHIFT;
-#    define __LZO_HShift    ((unsigned) &_AHSHIFT)
-#  elif defined(__LZO_WIN16)
-#    define __LZO_HShift    3
-#  else
-#    define __LZO_HShift    12
-#  endif
-#  if !defined(_FP_SEG) && defined(FP_SEG)
-#    define _FP_SEG         FP_SEG
-#  endif
-#  if !defined(_FP_OFF) && defined(FP_OFF)
-#    define _FP_OFF         FP_OFF
-#  endif
-#endif
-
-#if (UINT_MAX >= LZO_0xffffffffL)
-   typedef ptrdiff_t            lzo_ptrdiff_t;
-#else
-   typedef long                 lzo_ptrdiff_t;
-#endif
-
-#if !defined(__LZO_HAVE_PTR_T)
-#  if defined(lzo_ptr_t)
-#    define __LZO_HAVE_PTR_T
-#  endif
-#endif
-#if !defined(__LZO_HAVE_PTR_T)
-#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_LONG)
-#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_LONG)
-       typedef unsigned long    lzo_ptr_t;
-       typedef long             lzo_sptr_t;
-#      define __LZO_HAVE_PTR_T
-#    endif
-#  endif
-#endif
-#if !defined(__LZO_HAVE_PTR_T)
-#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED)
-#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED)
-       typedef unsigned int     lzo_ptr_t;
-       typedef int              lzo_sptr_t;
-#      define __LZO_HAVE_PTR_T
-#    endif
-#  endif
-#endif
-#if !defined(__LZO_HAVE_PTR_T)
-#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_SHORT)
-#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_SHORT)
-       typedef unsigned short   lzo_ptr_t;
-       typedef short            lzo_sptr_t;
-#      define __LZO_HAVE_PTR_T
-#    endif
-#  endif
-#endif
-#if !defined(__LZO_HAVE_PTR_T)
-#  if defined(LZO_HAVE_CONFIG_H) || defined(SIZEOF_CHAR_P)
-#    error "no suitable type for lzo_ptr_t"
-#  else
-     typedef unsigned long      lzo_ptr_t;
-     typedef long               lzo_sptr_t;
-#    define __LZO_HAVE_PTR_T
-#  endif
-#endif
-
-#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
-#define PTR(a)              ((lzo_bytep) (a))
-#define PTR_ALIGNED_4(a)    ((_FP_OFF(a) & 3) == 0)
-#define PTR_ALIGNED2_4(a,b) (((_FP_OFF(a) | _FP_OFF(b)) & 3) == 0)
-#else
-#define PTR(a)              ((lzo_ptr_t) (a))
-#define PTR_LINEAR(a)       PTR(a)
-#define PTR_ALIGNED_4(a)    ((PTR_LINEAR(a) & 3) == 0)
-#define PTR_ALIGNED_8(a)    ((PTR_LINEAR(a) & 7) == 0)
-#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0)
-#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0)
-#endif
-
-#define PTR_LT(a,b)         (PTR(a) < PTR(b))
-#define PTR_GE(a,b)         (PTR(a) >= PTR(b))
-#define PTR_DIFF(a,b)       ((lzo_ptrdiff_t) (PTR(a) - PTR(b)))
-
-LZO_EXTERN(lzo_ptr_t)
-__lzo_ptr_linear(const lzo_voidp ptr);
-
-typedef union
-{
-    char            a_char;
-    unsigned char   a_uchar;
-    short           a_short;
-    unsigned short  a_ushort;
-    int             a_int;
-    unsigned int    a_uint;
-    long            a_long;
-    unsigned long   a_ulong;
-    lzo_int         a_lzo_int;
-    lzo_uint        a_lzo_uint;
-    lzo_int32       a_lzo_int32;
-    lzo_uint32      a_lzo_uint32;
-    ptrdiff_t       a_ptrdiff_t;
-    lzo_ptrdiff_t   a_lzo_ptrdiff_t;
-    lzo_ptr_t       a_lzo_ptr_t;
-    char *          a_charp;
-    lzo_bytep       a_lzo_bytep;
-    lzo_bytepp      a_lzo_bytepp;
-}
-lzo_align_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-#define LZO_DETERMINISTIC
-
-#define LZO_DICT_USE_PTR
-#if defined(__LZO_DOS16) || defined(__LZO_WIN16) || defined(__LZO_STRICT_16BIT)
-#  undef LZO_DICT_USE_PTR
-#endif
-
-#if defined(LZO_DICT_USE_PTR)
-#  define lzo_dict_t    const lzo_bytep
-#  define lzo_dict_p    lzo_dict_t __LZO_MMODEL *
-#else
-#  define lzo_dict_t    lzo_uint
-#  define lzo_dict_p    lzo_dict_t __LZO_MMODEL *
-#endif
-
-#if !defined(lzo_moff_t)
-#define lzo_moff_t      lzo_uint
-#endif
-
-#endif
-
-LZO_PUBLIC(lzo_ptr_t)
-__lzo_ptr_linear(const lzo_voidp ptr)
-{
-    lzo_ptr_t p;
-
-#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
-    p = (((lzo_ptr_t)(_FP_SEG(ptr))) << (16 - __LZO_HShift)) + (_FP_OFF(ptr));
-#else
-    p = PTR_LINEAR(ptr);
-#endif
-
-    return p;
-}
-
-LZO_PUBLIC(unsigned)
-__lzo_align_gap(const lzo_voidp ptr, lzo_uint size)
-{
-    lzo_ptr_t p, s, n;
-
-    assert(size > 0);
-
-    p = __lzo_ptr_linear(ptr);
-    s = (lzo_ptr_t) (size - 1);
-#if 0
-    assert((size & (size - 1)) == 0);
-    n = ((p + s) & ~s) - p;
-#else
-    n = (((p + s) / size) * size) - p;
-#endif
-
-    assert((long)n >= 0);
-    assert(n <= s);
-
-    return (unsigned)n;
-}
-
-#ifndef __LZO_UTIL_H
-#define __LZO_UTIL_H
-
-#ifndef __LZO_CONF_H
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if 1 && defined(HAVE_MEMCPY)
-#if !defined(__LZO_DOS16) && !defined(__LZO_WIN16)
-
-#define MEMCPY8_DS(dest,src,len) \
-    memcpy(dest,src,len); \
-    dest += len; \
-    src += len
-
-#endif
-#endif
-
-#if 0 && !defined(MEMCPY8_DS)
-
-#define MEMCPY8_DS(dest,src,len) \
-    { do { \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	len -= 8; \
-    } while (len > 0); }
-
-#endif
-
-#if !defined(MEMCPY8_DS)
-
-#define MEMCPY8_DS(dest,src,len) \
-    { register lzo_uint __l = (len) / 8; \
-    do { \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-	*dest++ = *src++; \
-    } while (--__l > 0); }
-
-#endif
-
-#define MEMCPY_DS(dest,src,len) \
-    do *dest++ = *src++; \
-    while (--len > 0)
-
-#define MEMMOVE_DS(dest,src,len) \
-    do *dest++ = *src++; \
-    while (--len > 0)
-
-#if 0 && defined(LZO_OPTIMIZE_GNUC_i386)
-
-#define BZERO8_PTR(s,l,n) \
-__asm__ __volatile__( \
-    "movl  %0,%%eax \n"             \
-    "movl  %1,%%edi \n"             \
-    "movl  %2,%%ecx \n"             \
-    "cld \n"                        \
-    "rep \n"                        \
-    "stosl %%eax,(%%edi) \n"        \
-    :               \
-    :"g" (0),"g" (s),"g" (n)        \
-    :"eax","edi","ecx", "memory", "cc" \
-)
-
-#elif (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET)
-
-#if 1
-#define BZERO8_PTR(s,l,n)   memset((s),0,(lzo_uint)(l)*(n))
-#else
-#define BZERO8_PTR(s,l,n)   memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n))
-#endif
-
-#else
-
-#define BZERO8_PTR(s,l,n) \
-    lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n))
-
-#endif
-
-#if 0
-#if defined(__GNUC__) && defined(__i386__)
-
-unsigned char lzo_rotr8(unsigned char value, int shift);
-extern __inline__ unsigned char lzo_rotr8(unsigned char value, int shift)
-{
-    unsigned char result;
-
-    __asm__ __volatile__ ("movb %b1, %b0; rorb %b2, %b0"
-			: "=a"(result) : "g"(value), "c"(shift));
-    return result;
-}
-
-unsigned short lzo_rotr16(unsigned short value, int shift);
-extern __inline__ unsigned short lzo_rotr16(unsigned short value, int shift)
-{
-    unsigned short result;
-
-    __asm__ __volatile__ ("movw %b1, %b0; rorw %b2, %b0"
-			: "=a"(result) : "g"(value), "c"(shift));
-    return result;
-}
-
-#endif
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-LZO_PUBLIC(lzo_bool)
-lzo_assert(int expr)
-{
-    return (expr) ? 1 : 0;
-}
-
-/* If you use the LZO library in a product, you *must* keep this
- * copyright string in the executable of your product.
- */
-
-const lzo_byte __lzo_copyright[] =
-#if !defined(__LZO_IN_MINLZO)
-    LZO_VERSION_STRING;
-#else
-    "\n\n\n"
-    "LZO real-time data compression library.\n"
-    "Copyright (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer\n"
-    "<markus.oberhumer@jk.uni-linz.ac.at>\n"
-    "http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html\n"
-    "\n"
-    "LZO version: v" LZO_VERSION_STRING ", " LZO_VERSION_DATE "\n"
-    "LZO build date: " __DATE__ " " __TIME__ "\n\n"
-    "LZO special compilation options:\n"
-#ifdef __cplusplus
-    " __cplusplus\n"
-#endif
-#if defined(__PIC__)
-    " __PIC__\n"
-#elif defined(__pic__)
-    " __pic__\n"
-#endif
-#if (UINT_MAX < LZO_0xffffffffL)
-    " 16BIT\n"
-#endif
-#if defined(__LZO_STRICT_16BIT)
-    " __LZO_STRICT_16BIT\n"
-#endif
-#if (UINT_MAX > LZO_0xffffffffL)
-    " UINT_MAX=" _LZO_MEXPAND(UINT_MAX) "\n"
-#endif
-#if (ULONG_MAX > LZO_0xffffffffL)
-    " ULONG_MAX=" _LZO_MEXPAND(ULONG_MAX) "\n"
-#endif
-#if defined(LZO_BYTE_ORDER)
-    " LZO_BYTE_ORDER=" _LZO_MEXPAND(LZO_BYTE_ORDER) "\n"
-#endif
-#if defined(LZO_UNALIGNED_OK_2)
-    " LZO_UNALIGNED_OK_2\n"
-#endif
-#if defined(LZO_UNALIGNED_OK_4)
-    " LZO_UNALIGNED_OK_4\n"
-#endif
-#if defined(LZO_ALIGNED_OK_4)
-    " LZO_ALIGNED_OK_4\n"
-#endif
-#if defined(LZO_DICT_USE_PTR)
-    " LZO_DICT_USE_PTR\n"
-#endif
-#if defined(__LZO_QUERY_COMPRESS)
-    " __LZO_QUERY_COMPRESS\n"
-#endif
-#if defined(__LZO_QUERY_DECOMPRESS)
-    " __LZO_QUERY_DECOMPRESS\n"
-#endif
-#if defined(__LZO_IN_MINILZO)
-    " __LZO_IN_MINILZO\n"
-#endif
-    "\n\n"
-    "$Id: LZO " LZO_VERSION_STRING " built " __DATE__ " " __TIME__
-#if defined(__GNUC__) && defined(__VERSION__)
-    " by gcc " __VERSION__
-#elif defined(__BORLANDC__)
-    " by Borland C " _LZO_MEXPAND(__BORLANDC__)
-#elif defined(_MSC_VER)
-    " by Microsoft C " _LZO_MEXPAND(_MSC_VER)
-#elif defined(__PUREC__)
-    " by Pure C " _LZO_MEXPAND(__PUREC__)
-#elif defined(__SC__)
-    " by Symantec C " _LZO_MEXPAND(__SC__)
-#elif defined(__TURBOC__)
-    " by Turbo C " _LZO_MEXPAND(__TURBOC__)
-#elif defined(__WATCOMC__)
-    " by Watcom C " _LZO_MEXPAND(__WATCOMC__)
-#endif
-    " $\n"
-    "$Copyright: LZO (C) 1996, 1997, 1998, 1999, 2000 Markus Franz Xaver Johannes Oberhumer $\n";
-#endif
-
-LZO_PUBLIC(const lzo_byte *)
-lzo_copyright(void)
-{
-    return __lzo_copyright;
-}
-
-LZO_PUBLIC(unsigned)
-lzo_version(void)
-{
-    return LZO_VERSION;
-}
-
-LZO_PUBLIC(const char *)
-lzo_version_string(void)
-{
-    return LZO_VERSION_STRING;
-}
-
-LZO_PUBLIC(const char *)
-lzo_version_date(void)
-{
-    return LZO_VERSION_DATE;
-}
-
-LZO_PUBLIC(const lzo_charp)
-_lzo_version_string(void)
-{
-    return LZO_VERSION_STRING;
-}
-
-LZO_PUBLIC(const lzo_charp)
-_lzo_version_date(void)
-{
-    return LZO_VERSION_DATE;
-}
-
-#define LZO_BASE 65521u
-#define LZO_NMAX 5552
-
-#define LZO_DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
-#define LZO_DO2(buf,i)  LZO_DO1(buf,i); LZO_DO1(buf,i+1);
-#define LZO_DO4(buf,i)  LZO_DO2(buf,i); LZO_DO2(buf,i+2);
-#define LZO_DO8(buf,i)  LZO_DO4(buf,i); LZO_DO4(buf,i+4);
-#define LZO_DO16(buf,i) LZO_DO8(buf,i); LZO_DO8(buf,i+8);
-
-LZO_PUBLIC(lzo_uint32)
-lzo_adler32(lzo_uint32 adler, const lzo_byte *buf, lzo_uint len)
-{
-    lzo_uint32 s1 = adler & 0xffff;
-    lzo_uint32 s2 = (adler >> 16) & 0xffff;
-    int k;
-
-    if (buf == NULL)
-	return 1;
-
-    while (len > 0)
-    {
-	k = len < LZO_NMAX ? (int) len : LZO_NMAX;
-	len -= k;
-	if (k >= 16) do
-	{
-	    LZO_DO16(buf,0);
-	    buf += 16;
-	    k -= 16;
-	} while (k >= 16);
-	if (k != 0) do
-	{
-	    s1 += *buf++;
-	    s2 += s1;
-	} while (--k > 0);
-	s1 %= LZO_BASE;
-	s2 %= LZO_BASE;
-    }
-    return (s2 << 16) | s1;
-}
-
-LZO_PUBLIC(int)
-lzo_memcmp(const lzo_voidp s1, const lzo_voidp s2, lzo_uint len)
-{
-#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCMP)
-    return memcmp(s1,s2,len);
-#else
-    const lzo_byte *p1 = (const lzo_byte *) s1;
-    const lzo_byte *p2 = (const lzo_byte *) s2;
-    int d;
-
-    if (len > 0) do
-    {
-	d = *p1 - *p2;
-	if (d != 0)
-	    return d;
-	p1++;
-	p2++;
-    }
-    while (--len > 0);
-    return 0;
-#endif
-}
-
-LZO_PUBLIC(lzo_voidp)
-lzo_memcpy(lzo_voidp dest, const lzo_voidp src, lzo_uint len)
-{
-#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMCPY)
-    return memcpy(dest,src,len);
-#else
-    lzo_byte *p1 = (lzo_byte *) dest;
-    const lzo_byte *p2 = (const lzo_byte *) src;
-
-    if (len <= 0 || p1 == p2)
-	return dest;
-    do
-	*p1++ = *p2++;
-    while (--len > 0);
-    return dest;
-#endif
-}
-
-LZO_PUBLIC(lzo_voidp)
-lzo_memmove(lzo_voidp dest, const lzo_voidp src, lzo_uint len)
-{
-#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMMOVE)
-    return memmove(dest,src,len);
-#else
-    lzo_byte *p1 = (lzo_byte *) dest;
-    const lzo_byte *p2 = (const lzo_byte *) src;
-
-    if (len <= 0 || p1 == p2)
-	return dest;
-
-    if (p1 < p2)
-    {
-	do
-	    *p1++ = *p2++;
-	while (--len > 0);
-    }
-    else
-    {
-	p1 += len;
-	p2 += len;
-	do
-	    *--p1 = *--p2;
-	while (--len > 0);
-    }
-    return dest;
-#endif
-}
-
-LZO_PUBLIC(lzo_voidp)
-lzo_memset(lzo_voidp s, int c, lzo_uint len)
-{
-#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET)
-    return memset(s,c,len);
-#else
-    lzo_byte *p = (lzo_byte *) s;
-
-    if (len > 0) do
-	*p++ = LZO_BYTE(c);
-    while (--len > 0);
-    return s;
-#endif
-}
-
-#include <stdio.h>
-
-#if 0
-#  define IS_SIGNED(type)       (((type) (1ul << (8 * sizeof(type) - 1))) < 0)
-#  define IS_UNSIGNED(type)     (((type) (1ul << (8 * sizeof(type) - 1))) > 0)
-#else
-#  define IS_SIGNED(type)       (((type) (-1)) < ((type) 0))
-#  define IS_UNSIGNED(type)     (((type) (-1)) > ((type) 0))
-#endif
-
-static lzo_bool schedule_insns_bug(void);
-static lzo_bool strength_reduce_bug(int *);
-
-#if 0 || defined(LZO_DEBUG)
-static lzo_bool __lzo_assert_fail(const char *s, unsigned line)
-{
-#if defined(__palmos__)
-    printf("LZO assertion failed in line %u: '%s'\n",line,s);
-#else
-    fprintf(stderr,"LZO assertion failed in line %u: '%s'\n",line,s);
-#endif
-    return 0;
-}
-#  define __lzo_assert(x)   ((x) ? 1 : __lzo_assert_fail(#x,__LINE__))
-#else
-#  define __lzo_assert(x)   ((x) ? 1 : 0)
-#endif
-
-static lzo_bool basic_integral_check(void)
-{
-    lzo_bool r = 1;
-    lzo_bool sanity;
-
-    r &= __lzo_assert(CHAR_BIT == 8);
-    r &= __lzo_assert(sizeof(char) == 1);
-    r &= __lzo_assert(sizeof(short) >= 2);
-    r &= __lzo_assert(sizeof(long) >= 4);
-    r &= __lzo_assert(sizeof(int) >= sizeof(short));
-    r &= __lzo_assert(sizeof(long) >= sizeof(int));
-
-    r &= __lzo_assert(sizeof(lzo_uint32) >= 4);
-    r &= __lzo_assert(sizeof(lzo_uint32) >= sizeof(unsigned));
-#if defined(__LZO_STRICT_16BIT)
-    r &= __lzo_assert(sizeof(lzo_uint) == 2);
-#else
-    r &= __lzo_assert(sizeof(lzo_uint) >= 4);
-    r &= __lzo_assert(sizeof(lzo_uint) >= sizeof(unsigned));
-#endif
-
-#if defined(SIZEOF_UNSIGNED)
-    r &= __lzo_assert(SIZEOF_UNSIGNED == sizeof(unsigned));
-#endif
-#if defined(SIZEOF_UNSIGNED_LONG)
-    r &= __lzo_assert(SIZEOF_UNSIGNED_LONG == sizeof(unsigned long));
-#endif
-#if defined(SIZEOF_UNSIGNED_SHORT)
-    r &= __lzo_assert(SIZEOF_UNSIGNED_SHORT == sizeof(unsigned short));
-#endif
-#if !defined(__LZO_IN_MINILZO)
-#if defined(SIZEOF_SIZE_T)
-    r &= __lzo_assert(SIZEOF_SIZE_T == sizeof(size_t));
-#endif
-#endif
-
-    sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) &&
-	     IS_UNSIGNED(unsigned long) &&
-	     IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long);
-    if (sanity)
-    {
-	r &= __lzo_assert(IS_UNSIGNED(lzo_uint32));
-	r &= __lzo_assert(IS_UNSIGNED(lzo_uint));
-	r &= __lzo_assert(IS_SIGNED(lzo_int32));
-	r &= __lzo_assert(IS_SIGNED(lzo_int));
-
-	r &= __lzo_assert(INT_MAX    == LZO_STYPE_MAX(sizeof(int)));
-	r &= __lzo_assert(UINT_MAX   == LZO_UTYPE_MAX(sizeof(unsigned)));
-	r &= __lzo_assert(LONG_MAX   == LZO_STYPE_MAX(sizeof(long)));
-	r &= __lzo_assert(ULONG_MAX  == LZO_UTYPE_MAX(sizeof(unsigned long)));
-	r &= __lzo_assert(SHRT_MAX   == LZO_STYPE_MAX(sizeof(short)));
-	r &= __lzo_assert(USHRT_MAX  == LZO_UTYPE_MAX(sizeof(unsigned short)));
-	r &= __lzo_assert(LZO_UINT32_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint32)));
-	r &= __lzo_assert(LZO_UINT_MAX   == LZO_UTYPE_MAX(sizeof(lzo_uint)));
-#if !defined(__LZO_IN_MINILZO)
-	r &= __lzo_assert(SIZE_T_MAX     == LZO_UTYPE_MAX(sizeof(size_t)));
-#endif
-    }
-
-#if 0
-    r &= __lzo_assert(LZO_BYTE(257) == 1);
-    r &= __lzo_assert(LZO_USHORT(65537L) == 1);
-#endif
-
-    return r;
-}
-
-static lzo_bool basic_ptr_check(void)
-{
-    lzo_bool r = 1;
-    lzo_bool sanity;
-
-    r &= __lzo_assert(sizeof(char *) >= sizeof(int));
-    r &= __lzo_assert(sizeof(lzo_byte *) >= sizeof(char *));
-
-    r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_byte *));
-    r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_voidpp));
-    r &= __lzo_assert(sizeof(lzo_voidp) == sizeof(lzo_bytepp));
-    r &= __lzo_assert(sizeof(lzo_voidp) >= sizeof(lzo_uint));
-
-    r &= __lzo_assert(sizeof(lzo_ptr_t) == sizeof(lzo_voidp));
-    r &= __lzo_assert(sizeof(lzo_ptr_t) >= sizeof(lzo_uint));
-
-    r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= 4);
-    r &= __lzo_assert(sizeof(lzo_ptrdiff_t) >= sizeof(ptrdiff_t));
-
-#if defined(SIZEOF_CHAR_P)
-    r &= __lzo_assert(SIZEOF_CHAR_P == sizeof(char *));
-#endif
-#if defined(SIZEOF_PTRDIFF_T)
-    r &= __lzo_assert(SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t));
-#endif
-
-    sanity = IS_UNSIGNED(unsigned short) && IS_UNSIGNED(unsigned) &&
-	     IS_UNSIGNED(unsigned long) &&
-	     IS_SIGNED(short) && IS_SIGNED(int) && IS_SIGNED(long);
-    if (sanity)
-    {
-	r &= __lzo_assert(IS_UNSIGNED(lzo_ptr_t));
-	r &= __lzo_assert(IS_UNSIGNED(lzo_moff_t));
-	r &= __lzo_assert(IS_SIGNED(lzo_ptrdiff_t));
-	r &= __lzo_assert(IS_SIGNED(lzo_sptr_t));
-    }
-
-    return r;
-}
-
-static lzo_bool ptr_check(void)
-{
-    lzo_bool r = 1;
-    int i;
-    char _wrkmem[10 * sizeof(lzo_byte *) + sizeof(lzo_align_t)];
-    lzo_byte *wrkmem;
-    lzo_bytepp dict;
-    unsigned char x[4 * sizeof(lzo_align_t)];
-    long d;
-    lzo_align_t a;
-
-    for (i = 0; i < (int) sizeof(x); i++)
-	x[i] = LZO_BYTE(i);
-
-    wrkmem = LZO_PTR_ALIGN_UP((lzo_byte *)_wrkmem,sizeof(lzo_align_t));
-    dict = (lzo_bytepp) wrkmem;
-
-    d = (long) ((const lzo_bytep) dict - (const lzo_bytep) _wrkmem);
-    r &= __lzo_assert(d >= 0);
-    r &= __lzo_assert(d < (long) sizeof(lzo_align_t));
-
-    memset(&a,0xff,sizeof(a));
-    r &= __lzo_assert(a.a_ushort == USHRT_MAX);
-    r &= __lzo_assert(a.a_uint == UINT_MAX);
-    r &= __lzo_assert(a.a_ulong == ULONG_MAX);
-    r &= __lzo_assert(a.a_lzo_uint == LZO_UINT_MAX);
-
-    if (r == 1)
-    {
-	for (i = 0; i < 8; i++)
-	    r &= __lzo_assert((const lzo_voidp) (&dict[i]) == (const lzo_voidp) (&wrkmem[i * sizeof(lzo_byte *)]));
-    }
-
-    memset(&a,0,sizeof(a));
-    r &= __lzo_assert(a.a_charp == NULL);
-    r &= __lzo_assert(a.a_lzo_bytep == NULL);
-    //r &= __lzo_assert(NULL == 0);
-    if (r == 1)
-    {
-	for (i = 0; i < 10; i++)
-	    dict[i] = wrkmem;
-	BZERO8_PTR(dict+1,sizeof(dict[0]),8);
-	r &= __lzo_assert(dict[0] == wrkmem);
-	for (i = 1; i < 9; i++)
-	    r &= __lzo_assert(dict[i] == NULL);
-	r &= __lzo_assert(dict[9] == wrkmem);
-    }
-
-    if (r == 1)
-    {
-	unsigned k = 1;
-	const unsigned n = (unsigned) sizeof(lzo_uint32);
-	lzo_byte *p0;
-	lzo_byte *p1;
-
-	k += __lzo_align_gap(&x[k],n);
-	p0 = (lzo_bytep) &x[k];
-#if defined(PTR_LINEAR)
-	r &= __lzo_assert((PTR_LINEAR(p0) & (n-1)) == 0);
-#else
-	r &= __lzo_assert(n == 4);
-	r &= __lzo_assert(PTR_ALIGNED_4(p0));
-#endif
-
-	r &= __lzo_assert(k >= 1);
-	p1 = (lzo_bytep) &x[1];
-	r &= __lzo_assert(PTR_GE(p0,p1));
-
-	r &= __lzo_assert(k < 1+n);
-	p1 = (lzo_bytep) &x[1+n];
-	r &= __lzo_assert(PTR_LT(p0,p1));
-
-	if (r == 1)
-	{
-	    lzo_uint32 v0 = * (lzo_uint32 *) &x[k];
-	    lzo_uint32 v1 = * (lzo_uint32 *) &x[k+n];
-
-	    r &= __lzo_assert(v0 > 0);
-	    r &= __lzo_assert(v1 > 0);
-	}
-    }
-
-    return r;
-}
-
-LZO_PUBLIC(int)
-_lzo_config_check(void)
-{
-    lzo_bool r = 1;
-    int i;
-    union {
-	lzo_uint32 a;
-	unsigned short b;
-	lzo_uint32 aa[4];
-	unsigned char x[4*sizeof(lzo_align_t)];
-    } u;
-
-#if 0
-    r &= __lzo_assert((const void *)&u == (const void *)&u.a);
-    r &= __lzo_assert((const void *)&u == (const void *)&u.b);
-    r &= __lzo_assert((const void *)&u == (const void *)&u.x[0]);
-    r &= __lzo_assert((const void *)&u == (const void *)&u.aa[0]);
-#endif
-
-    r &= basic_integral_check();
-    r &= basic_ptr_check();
-    if (r != 1)
-	return LZO_E_ERROR;
-
-    u.a = 0; u.b = 0;
-    for (i = 0; i < (int) sizeof(u.x); i++)
-	u.x[i] = LZO_BYTE(i);
-
-#if 0
-    r &= __lzo_assert( (int) (unsigned char) ((char) -1) == 255);
-#endif
-
-#if defined(LZO_BYTE_ORDER)
-    if (r == 1)
-    {
-#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-	lzo_uint32 a = (lzo_uint32) (u.a & LZO_0xffffffffL);
-	unsigned short b = (unsigned short) (u.b & 0xffff);
-	r &= __lzo_assert(a == 0x03020100L);
-	r &= __lzo_assert(b == 0x0100);
-#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
-	lzo_uint32 a = u.a >> (8 * sizeof(u.a) - 32);
-	unsigned short b = u.b >> (8 * sizeof(u.b) - 16);
-	r &= __lzo_assert(a == 0x00010203L);
-	r &= __lzo_assert(b == 0x0001);
-#  else
-#    error invalid LZO_BYTE_ORDER
-#  endif
-    }
-#endif
-
-#if defined(LZO_UNALIGNED_OK_2)
-    r &= __lzo_assert(sizeof(short) == 2);
-    if (r == 1)
-    {
-	unsigned short b[4];
-
-	for (i = 0; i < 4; i++)
-	    b[i] = * (const unsigned short *) &u.x[i];
-
-#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-	r &= __lzo_assert(b[0] == 0x0100);
-	r &= __lzo_assert(b[1] == 0x0201);
-	r &= __lzo_assert(b[2] == 0x0302);
-	r &= __lzo_assert(b[3] == 0x0403);
-#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
-	r &= __lzo_assert(b[0] == 0x0001);
-	r &= __lzo_assert(b[1] == 0x0102);
-	r &= __lzo_assert(b[2] == 0x0203);
-	r &= __lzo_assert(b[3] == 0x0304);
-#  endif
-    }
-#endif
-
-#if defined(LZO_UNALIGNED_OK_4)
-    r &= __lzo_assert(sizeof(lzo_uint32) == 4);
-    if (r == 1)
-    {
-	lzo_uint32 a[4];
-
-	for (i = 0; i < 4; i++)
-	    a[i] = * (const lzo_uint32 *) &u.x[i];
-
-#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-	r &= __lzo_assert(a[0] == 0x03020100L);
-	r &= __lzo_assert(a[1] == 0x04030201L);
-	r &= __lzo_assert(a[2] == 0x05040302L);
-	r &= __lzo_assert(a[3] == 0x06050403L);
-#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
-	r &= __lzo_assert(a[0] == 0x00010203L);
-	r &= __lzo_assert(a[1] == 0x01020304L);
-	r &= __lzo_assert(a[2] == 0x02030405L);
-	r &= __lzo_assert(a[3] == 0x03040506L);
-#  endif
-    }
-#endif
-
-#if defined(LZO_ALIGNED_OK_4)
-    r &= __lzo_assert(sizeof(lzo_uint32) == 4);
-#endif
-
-    r &= __lzo_assert(lzo_sizeof_dict_t == sizeof(lzo_dict_t));
-
-#if defined(__LZO_IN_MINLZO)
-    if (r == 1)
-    {
-	lzo_uint32 adler;
-	adler = lzo_adler32(0, NULL, 0);
-	adler = lzo_adler32(adler, lzo_copyright(), 200);
-	r &= __lzo_assert(adler == 0x7ea34377L);
-    }
-#endif
-
-    if (r == 1)
-    {
-	r &= __lzo_assert(!schedule_insns_bug());
-    }
-
-    if (r == 1)
-    {
-	static int x[3];
-	static unsigned xn = 3;
-	register unsigned j;
-
-	for (j = 0; j < xn; j++)
-	    x[j] = (int)j - 3;
-	r &= __lzo_assert(!strength_reduce_bug(x));
-    }
-
-    if (r == 1)
-    {
-	r &= ptr_check();
-    }
-
-    return r == 1 ? LZO_E_OK : LZO_E_ERROR;
-}
-
-static lzo_bool schedule_insns_bug(void)
-{
-#if defined(__LZO_CHECKER)
-    return 0;
-#else
-    const int clone[] = {1, 2, 0};
-    const int *q;
-    q = clone;
-    return (*q) ? 0 : 1;
-#endif
-}
-
-static lzo_bool strength_reduce_bug(int *x)
-{
-    return x[0] != -3 || x[1] != -2 || x[2] != -1;
-}
-
-int __lzo_init_done = 0;
-
-LZO_PUBLIC(int)
-__lzo_init2(unsigned v, int s1, int s2, int s3, int s4, int s5,
-			int s6, int s7, int s8, int s9)
-{
-    int r;
-
-    __lzo_init_done = 1;
-
-    if (v == 0)
-	return LZO_E_ERROR;
-
-    r = (s1 == -1 || s1 == (int) sizeof(short)) &&
-	(s2 == -1 || s2 == (int) sizeof(int)) &&
-	(s3 == -1 || s3 == (int) sizeof(long)) &&
-	(s4 == -1 || s4 == (int) sizeof(lzo_uint32)) &&
-	(s5 == -1 || s5 == (int) sizeof(lzo_uint)) &&
-	(s6 == -1 || s6 == (int) lzo_sizeof_dict_t) &&
-	(s7 == -1 || s7 == (int) sizeof(char *)) &&
-	(s8 == -1 || s8 == (int) sizeof(lzo_voidp)) &&
-	(s9 == -1 || s9 == (int) sizeof(lzo_compress_t));
-    if (!r)
-	return LZO_E_ERROR;
-
-    r = _lzo_config_check();
-    if (r != LZO_E_OK)
-	return r;
-
-    return r;
-}
-
-#if !defined(__LZO_IN_MINILZO)
-
-LZO_EXTERN(int)
-__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7);
-
-LZO_PUBLIC(int)
-__lzo_init(unsigned v,int s1,int s2,int s3,int s4,int s5,int s6,int s7)
-{
-    if (v == 0 || v > 0x1010)
-	return LZO_E_ERROR;
-    return __lzo_init2(v,s1,s2,s3,s4,s5,-1,-1,s6,s7);
-}
-
-#endif
-
-#define do_compress         _lzo1x_1_do_compress
-
-#define LZO_NEED_DICT_H
-#define D_BITS          14
-#define D_INDEX1(d,p)       d = DM((0x21*DX3(p,5,5,6)) >> 5)
-#define D_INDEX2(d,p)       d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
-
-#ifndef __LZO_CONFIG1X_H
-#define __LZO_CONFIG1X_H
-
-#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z)
-#  define LZO1X
-#endif
-
-#if !defined(__LZO_IN_MINILZO)
-#include <lzo1x.h>
-#endif
-
-#define LZO_EOF_CODE
-#undef LZO_DETERMINISTIC
-
-#define M1_MAX_OFFSET   0x0400
-#ifndef M2_MAX_OFFSET
-#define M2_MAX_OFFSET   0x0800
-#endif
-#define M3_MAX_OFFSET   0x4000
-#define M4_MAX_OFFSET   0xbfff
-
-#define MX_MAX_OFFSET   (M1_MAX_OFFSET + M2_MAX_OFFSET)
-
-#define M1_MIN_LEN      2
-#define M1_MAX_LEN      2
-#define M2_MIN_LEN      3
-#ifndef M2_MAX_LEN
-#define M2_MAX_LEN      8
-#endif
-#define M3_MIN_LEN      3
-#define M3_MAX_LEN      33
-#define M4_MIN_LEN      3
-#define M4_MAX_LEN      9
-
-#define M1_MARKER       0
-#define M2_MARKER       64
-#define M3_MARKER       32
-#define M4_MARKER       16
-
-#ifndef MIN_LOOKAHEAD
-#define MIN_LOOKAHEAD       (M2_MAX_LEN + 1)
-#endif
-
-#if defined(LZO_NEED_DICT_H)
-
-#ifndef LZO_HASH
-#define LZO_HASH            LZO_HASH_LZO_INCREMENTAL_B
-#endif
-#define DL_MIN_LEN          M2_MIN_LEN
-
-#ifndef __LZO_DICT_H
-#define __LZO_DICT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if !defined(D_BITS) && defined(DBITS)
-#  define D_BITS        DBITS
-#endif
-#if !defined(D_BITS)
-#  error D_BITS is not defined
-#endif
-#if (D_BITS < 16)
-#  define D_SIZE        LZO_SIZE(D_BITS)
-#  define D_MASK        LZO_MASK(D_BITS)
-#else
-#  define D_SIZE        LZO_USIZE(D_BITS)
-#  define D_MASK        LZO_UMASK(D_BITS)
-#endif
-#define D_HIGH          ((D_MASK >> 1) + 1)
-
-#if !defined(DD_BITS)
-#  define DD_BITS       0
-#endif
-#define DD_SIZE         LZO_SIZE(DD_BITS)
-#define DD_MASK         LZO_MASK(DD_BITS)
-
-#if !defined(DL_BITS)
-#  define DL_BITS       (D_BITS - DD_BITS)
-#endif
-#if (DL_BITS < 16)
-#  define DL_SIZE       LZO_SIZE(DL_BITS)
-#  define DL_MASK       LZO_MASK(DL_BITS)
-#else
-#  define DL_SIZE       LZO_USIZE(DL_BITS)
-#  define DL_MASK       LZO_UMASK(DL_BITS)
-#endif
-
-#if (D_BITS != DL_BITS + DD_BITS)
-#  error D_BITS does not match
-#endif
-#if (D_BITS < 8 || D_BITS > 18)
-#  error invalid D_BITS
-#endif
-#if (DL_BITS < 8 || DL_BITS > 20)
-#  error invalid DL_BITS
-#endif
-#if (DD_BITS < 0 || DD_BITS > 6)
-#  error invalid DD_BITS
-#endif
-
-#if !defined(DL_MIN_LEN)
-#  define DL_MIN_LEN    3
-#endif
-#if !defined(DL_SHIFT)
-#  define DL_SHIFT      ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN)
-#endif
-
-#define LZO_HASH_GZIP                   1
-#define LZO_HASH_GZIP_INCREMENTAL       2
-#define LZO_HASH_LZO_INCREMENTAL_A      3
-#define LZO_HASH_LZO_INCREMENTAL_B      4
-
-#if !defined(LZO_HASH)
-#  error choose a hashing strategy
-#endif
-
-#if (DL_MIN_LEN == 3)
-#  define _DV2_A(p,shift1,shift2) \
-	(((( (lzo_uint32)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2])
-#  define _DV2_B(p,shift1,shift2) \
-	(((( (lzo_uint32)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0])
-#  define _DV3_B(p,shift1,shift2,shift3) \
-	((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0])
-#elif (DL_MIN_LEN == 2)
-#  define _DV2_A(p,shift1,shift2) \
-	(( (lzo_uint32)(p[0]) << shift1) ^ p[1])
-#  define _DV2_B(p,shift1,shift2) \
-	(( (lzo_uint32)(p[1]) << shift1) ^ p[2])
-#else
-#  error invalid DL_MIN_LEN
-#endif
-#define _DV_A(p,shift)      _DV2_A(p,shift,shift)
-#define _DV_B(p,shift)      _DV2_B(p,shift,shift)
-#define DA2(p,s1,s2) \
-	(((((lzo_uint32)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0])
-#define DS2(p,s1,s2) \
-	(((((lzo_uint32)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0])
-#define DX2(p,s1,s2) \
-	(((((lzo_uint32)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0])
-#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0])
-#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0])
-#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0])
-#define DMS(v,s)        ((lzo_uint) (((v) & (D_MASK >> (s))) << (s)))
-#define DM(v)           DMS(v,0)
-
-#if (LZO_HASH == LZO_HASH_GZIP)
-#  define _DINDEX(dv,p)     (_DV_A((p),DL_SHIFT))
-
-#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL)
-#  define __LZO_HASH_INCREMENTAL
-#  define DVAL_FIRST(dv,p)  dv = _DV_A((p),DL_SHIFT)
-#  define DVAL_NEXT(dv,p)   dv = (((dv) << DL_SHIFT) ^ p[2])
-#  define _DINDEX(dv,p)     (dv)
-#  define DVAL_LOOKAHEAD    DL_MIN_LEN
-
-#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A)
-#  define __LZO_HASH_INCREMENTAL
-#  define DVAL_FIRST(dv,p)  dv = _DV_A((p),5)
-#  define DVAL_NEXT(dv,p) \
-		dv ^= (lzo_uint32)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2])
-#  define _DINDEX(dv,p)     ((0x9f5f * (dv)) >> 5)
-#  define DVAL_LOOKAHEAD    DL_MIN_LEN
-
-#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B)
-#  define __LZO_HASH_INCREMENTAL
-#  define DVAL_FIRST(dv,p)  dv = _DV_B((p),5)
-#  define DVAL_NEXT(dv,p) \
-		dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_uint32)(p[2]) << (2*5)))
-#  define _DINDEX(dv,p)     ((0x9f5f * (dv)) >> 5)
-#  define DVAL_LOOKAHEAD    DL_MIN_LEN
-
-#else
-#  error choose a hashing strategy
-#endif
-
-#ifndef DINDEX
-#define DINDEX(dv,p)        ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS)
-#endif
-#if !defined(DINDEX1) && defined(D_INDEX1)
-#define DINDEX1             D_INDEX1
-#endif
-#if !defined(DINDEX2) && defined(D_INDEX2)
-#define DINDEX2             D_INDEX2
-#endif
-
-#if !defined(__LZO_HASH_INCREMENTAL)
-#  define DVAL_FIRST(dv,p)  ((void) 0)
-#  define DVAL_NEXT(dv,p)   ((void) 0)
-#  define DVAL_LOOKAHEAD    0
-#endif
-
-#if !defined(DVAL_ASSERT)
-#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG)
-static void DVAL_ASSERT(lzo_uint32 dv, const lzo_byte *p)
-{
-    lzo_uint32 df;
-    DVAL_FIRST(df,(p));
-    assert(DINDEX(dv,p) == DINDEX(df,p));
-}
-#else
-#  define DVAL_ASSERT(dv,p) ((void) 0)
-#endif
-#endif
-
-#if defined(LZO_DICT_USE_PTR)
-#  define DENTRY(p,in)                          (p)
-#  define GINDEX(m_pos,m_off,dict,dindex,in)    m_pos = dict[dindex]
-#else
-#  define DENTRY(p,in)                          ((lzo_uint) ((p)-(in)))
-#  define GINDEX(m_pos,m_off,dict,dindex,in)    m_off = dict[dindex]
-#endif
-
-#if (DD_BITS == 0)
-
-#  define UPDATE_D(dict,drun,dv,p,in)       dict[ DINDEX(dv,p) ] = DENTRY(p,in)
-#  define UPDATE_I(dict,drun,index,p,in)    dict[index] = DENTRY(p,in)
-#  define UPDATE_P(ptr,drun,p,in)           (ptr)[0] = DENTRY(p,in)
-
-#else
-
-#  define UPDATE_D(dict,drun,dv,p,in)   \
-	dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
-#  define UPDATE_I(dict,drun,index,p,in)    \
-	dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
-#  define UPDATE_P(ptr,drun,p,in)   \
-	(ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK
-
-#endif
-
-#if defined(LZO_DICT_USE_PTR)
-
-#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \
-	(m_pos == NULL || (m_off = (lzo_moff_t) (ip - m_pos)) > max_offset)
-
-#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
-    (BOUNDS_CHECKING_OFF_IN_EXPR( \
-	(PTR_LT(m_pos,in) || \
-	 (m_off = (lzo_moff_t) PTR_DIFF(ip,m_pos)) <= 0 || \
-	  m_off > max_offset) ))
-
-#else
-
-#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \
-	(m_off == 0 || \
-	 ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \
-	 (m_pos = (ip) - (m_off), 0) )
-
-#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
-	((lzo_moff_t) ((ip)-(in)) <= m_off || \
-	 ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \
-	 (m_pos = (ip) - (m_off), 0) )
-
-#endif
-
-#if defined(LZO_DETERMINISTIC)
-#  define LZO_CHECK_MPOS    LZO_CHECK_MPOS_DET
-#else
-#  define LZO_CHECK_MPOS    LZO_CHECK_MPOS_NON_DET
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-#endif
-
-#endif
-
-#define DO_COMPRESS     lzo1x_1_compress
-
-static
-lzo_uint do_compress     ( const lzo_byte *in , lzo_uint  in_len,
-				 lzo_byte *out, lzo_uint *out_len,
-				 lzo_voidp wrkmem )
-{
-#if 0 && defined(__GNUC__) && defined(__i386__)
-    register const lzo_byte *ip __asm__("%esi");
-#else
-    register const lzo_byte *ip;
-#endif
-    lzo_byte *op;
-    const lzo_byte * const in_end = in + in_len;
-    const lzo_byte * const ip_end = in + in_len - M2_MAX_LEN - 5;
-    const lzo_byte *ii;
-    lzo_dict_p const dict = (lzo_dict_p) wrkmem;
-
-    op = out;
-    ip = in;
-    ii = ip;
-
-    ip += 4;
-    for (;;)
-    {
-#if 0 && defined(__GNUC__) && defined(__i386__)
-	register const lzo_byte *m_pos __asm__("%edi");
-#else
-	register const lzo_byte *m_pos;
-#endif
-	lzo_moff_t m_off;
-	lzo_uint m_len;
-	lzo_uint dindex;
-
-	DINDEX1(dindex,ip);
-	GINDEX(m_pos,m_off,dict,dindex,in);
-	if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
-	    goto literal;
-#if 1
-	if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
-	    goto try_match;
-	DINDEX2(dindex,ip);
-#endif
-	GINDEX(m_pos,m_off,dict,dindex,in);
-	if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
-	    goto literal;
-	if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
-	    goto try_match;
-	goto literal;
-
-try_match:
-#if 1 && defined(LZO_UNALIGNED_OK_2)
-	if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip)
-#else
-	if (m_pos[0] != ip[0] || m_pos[1] != ip[1])
-#endif
-	{
-	}
-	else
-	{
-	    if (m_pos[2] == ip[2])
-	    {
-#if 0
-		if (m_off <= M2_MAX_OFFSET)
-		    goto match;
-		if (lit <= 3)
-		    goto match;
-		if (lit == 3)
-		{
-		    assert(op - 2 > out); op[-2] |= LZO_BYTE(3);
-		    *op++ = *ii++; *op++ = *ii++; *op++ = *ii++;
-		    goto code_match;
-		}
-		if (m_pos[3] == ip[3])
-#endif
-		    goto match;
-	    }
-	    else
-	    {
-#if 0
-#if 0
-		if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3)
-#else
-		if (m_off <= M1_MAX_OFFSET && lit == 3)
-#endif
-		{
-		    register lzo_uint t;
-
-		    t = lit;
-		    assert(op - 2 > out); op[-2] |= LZO_BYTE(t);
-		    do *op++ = *ii++; while (--t > 0);
-		    assert(ii == ip);
-		    m_off -= 1;
-		    *op++ = LZO_BYTE(M1_MARKER | ((m_off & 3) << 2));
-		    *op++ = LZO_BYTE(m_off >> 2);
-		    ip += 2;
-		    goto match_done;
-		}
-#endif
-	    }
-	}
-
-literal:
-	UPDATE_I(dict,0,dindex,ip,in);
-	++ip;
-	if (ip >= ip_end)
-	    break;
-	continue;
-
-match:
-	UPDATE_I(dict,0,dindex,ip,in);
-	if (ip - ii > 0)
-	{
-	    register lzo_uint t = ip - ii;
-
-	    if (t <= 3)
-	    {
-		assert(op - 2 > out);
-		op[-2] |= LZO_BYTE(t);
-	    }
-	    else if (t <= 18)
-		*op++ = LZO_BYTE(t - 3);
-	    else
-	    {
-		register lzo_uint tt = t - 18;
-
-		*op++ = 0;
-		while (tt > 255)
-		{
-		    tt -= 255;
-		    *op++ = 0;
-		}
-		assert(tt > 0);
-		*op++ = LZO_BYTE(tt);
-	    }
-	    do *op++ = *ii++; while (--t > 0);
-	}
-
-	assert(ii == ip);
-	ip += 3;
-	if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ ||
-	    m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++
-#ifdef LZO1Y
-	    || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++
-	    || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++
-#endif
-	   )
-	{
-	    --ip;
-	    m_len = ip - ii;
-	    assert(m_len >= 3); assert(m_len <= M2_MAX_LEN);
-
-	    if (m_off <= M2_MAX_OFFSET)
-	    {
-		m_off -= 1;
-#if defined(LZO1X)
-		*op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2));
-		*op++ = LZO_BYTE(m_off >> 3);
-#elif defined(LZO1Y)
-		*op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2));
-		*op++ = LZO_BYTE(m_off >> 2);
-#endif
-	    }
-	    else if (m_off <= M3_MAX_OFFSET)
-	    {
-		m_off -= 1;
-		*op++ = LZO_BYTE(M3_MARKER | (m_len - 2));
-		goto m3_m4_offset;
-	    }
-	    else
-#if defined(LZO1X)
-	    {
-		m_off -= 0x4000;
-		assert(m_off > 0); assert(m_off <= 0x7fff);
-		*op++ = LZO_BYTE(M4_MARKER |
-				 ((m_off & 0x4000) >> 11) | (m_len - 2));
-		goto m3_m4_offset;
-	    }
-#elif defined(LZO1Y)
-		goto m4_match;
-#endif
-	}
-	else
-	{
-	    {
-		const lzo_byte *end = in_end;
-		const lzo_byte *m = m_pos + M2_MAX_LEN + 1;
-		while (ip < end && *m == *ip)
-		    m++, ip++;
-		m_len = (ip - ii);
-	    }
-	    assert(m_len > M2_MAX_LEN);
-
-	    if (m_off <= M3_MAX_OFFSET)
-	    {
-		m_off -= 1;
-		if (m_len <= 33)
-		    *op++ = LZO_BYTE(M3_MARKER | (m_len - 2));
-		else
-		{
-		    m_len -= 33;
-		    *op++ = M3_MARKER | 0;
-		    goto m3_m4_len;
-		}
-	    }
-	    else
-	    {
-#if defined(LZO1Y)
-m4_match:
-#endif
-		m_off -= 0x4000;
-		assert(m_off > 0); assert(m_off <= 0x7fff);
-		if (m_len <= M4_MAX_LEN)
-		    *op++ = LZO_BYTE(M4_MARKER |
-				     ((m_off & 0x4000) >> 11) | (m_len - 2));
-		else
-		{
-		    m_len -= M4_MAX_LEN;
-		    *op++ = LZO_BYTE(M4_MARKER | ((m_off & 0x4000) >> 11));
-m3_m4_len:
-		    while (m_len > 255)
-		    {
-			m_len -= 255;
-			*op++ = 0;
-		    }
-		    assert(m_len > 0);
-		    *op++ = LZO_BYTE(m_len);
-		}
-	    }
-
-m3_m4_offset:
-	    *op++ = LZO_BYTE((m_off & 63) << 2);
-	    *op++ = LZO_BYTE(m_off >> 6);
-	}
-
-#if 0
-match_done:
-#endif
-	ii = ip;
-	if (ip >= ip_end)
-	    break;
-    }
-
-    *out_len = op - out;
-    return (lzo_uint) (in_end - ii);
-}
-
-LZO_PUBLIC(int)
-DO_COMPRESS      ( const lzo_byte *in , lzo_uint  in_len,
-			 lzo_byte *out, lzo_uint *out_len,
-			 lzo_voidp wrkmem )
-{
-    lzo_byte *op = out;
-    lzo_uint t;
-
-#if defined(__LZO_QUERY_COMPRESS)
-    if (__LZO_IS_COMPRESS_QUERY(in,in_len,out,out_len,wrkmem))
-	return __LZO_QUERY_COMPRESS(in,in_len,out,out_len,wrkmem,D_SIZE,lzo_sizeof(lzo_dict_t));
-#endif
-
-    if (in_len <= M2_MAX_LEN + 5)
-	t = in_len;
-    else
-    {
-	t = do_compress(in,in_len,op,out_len,wrkmem);
-	op += *out_len;
-    }
-
-    if (t > 0)
-    {
-	const lzo_byte *ii = in + in_len - t;
-
-	if (op == out && t <= 238)
-	    *op++ = LZO_BYTE(17 + t);
-	else if (t <= 3)
-	    op[-2] |= LZO_BYTE(t);
-	else if (t <= 18)
-	    *op++ = LZO_BYTE(t - 3);
-	else
-	{
-	    lzo_uint tt = t - 18;
-
-	    *op++ = 0;
-	    while (tt > 255)
-	    {
-		tt -= 255;
-		*op++ = 0;
-	    }
-	    assert(tt > 0);
-	    *op++ = LZO_BYTE(tt);
-	}
-	do *op++ = *ii++; while (--t > 0);
-    }
-
-    *op++ = M4_MARKER | 1;
-    *op++ = 0;
-    *op++ = 0;
-
-    *out_len = op - out;
-    return LZO_E_OK;
-}
-
-#undef do_compress
-#undef DO_COMPRESS
-#undef LZO_HASH
-
-#undef LZO_TEST_DECOMPRESS_OVERRUN
-#undef LZO_TEST_DECOMPRESS_OVERRUN_INPUT
-#undef LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT
-#undef LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
-#undef DO_DECOMPRESS
-#define DO_DECOMPRESS       lzo1x_decompress
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
-#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-#    define LZO_TEST_DECOMPRESS_OVERRUN_INPUT       2
-#  endif
-#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-#    define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT      2
-#  endif
-#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-#    define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
-#  endif
-#endif
-
-#undef TEST_IP
-#undef TEST_OP
-#undef TEST_LOOKBEHIND
-#undef NEED_IP
-#undef NEED_OP
-#undef HAVE_TEST_IP
-#undef HAVE_TEST_OP
-#undef HAVE_NEED_IP
-#undef HAVE_NEED_OP
-#undef HAVE_ANY_IP
-#undef HAVE_ANY_OP
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
-#    define TEST_IP             (ip < ip_end)
-#  endif
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
-#    define NEED_IP(x) \
-	    if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x))  goto input_overrun
-#  endif
-#endif
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
-#    define TEST_OP             (op <= op_end)
-#  endif
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
-#    undef TEST_OP
-#    define NEED_OP(x) \
-	    if ((lzo_uint)(op_end - op) < (lzo_uint)(x))  goto output_overrun
-#  endif
-#endif
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-#  define TEST_LOOKBEHIND(m_pos,out)    if (m_pos < out) goto lookbehind_overrun
-#else
-#  define TEST_LOOKBEHIND(m_pos,op)     ((void) 0)
-#endif
-
-#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
-#  define TEST_IP               (ip < ip_end)
-#endif
-
-#if defined(TEST_IP)
-#  define HAVE_TEST_IP
-#else
-#  define TEST_IP               1
-#endif
-#if defined(TEST_OP)
-#  define HAVE_TEST_OP
-#else
-#  define TEST_OP               1
-#endif
-
-#if defined(NEED_IP)
-#  define HAVE_NEED_IP
-#else
-#  define NEED_IP(x)            ((void) 0)
-#endif
-#if defined(NEED_OP)
-#  define HAVE_NEED_OP
-#else
-#  define NEED_OP(x)            ((void) 0)
-#endif
-
-#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
-#  define HAVE_ANY_IP
-#endif
-#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
-#  define HAVE_ANY_OP
-#endif
-
-#if defined(DO_DECOMPRESS)
-LZO_PUBLIC(int)
-DO_DECOMPRESS  ( const lzo_byte *in , lzo_uint  in_len,
-		       lzo_byte *out, lzo_uint *out_len,
-		       lzo_voidp wrkmem )
-#endif
-{
-    register lzo_byte *op;
-    register const lzo_byte *ip;
-    register lzo_uint t;
-#if defined(COPY_DICT)
-    lzo_uint m_off;
-    const lzo_byte *dict_end;
-#else
-    register const lzo_byte *m_pos;
-#endif
-
-    const lzo_byte * const ip_end = in + in_len;
-#if defined(HAVE_ANY_OP)
-    lzo_byte * const op_end = out + *out_len;
-#endif
-#if defined(LZO1Z)
-    lzo_uint last_m_off = 0;
-#endif
-
-    LZO_UNUSED(wrkmem);
-
-#if defined(__LZO_QUERY_DECOMPRESS)
-    if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem))
-	return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0);
-#endif
-
-#if defined(COPY_DICT)
-    if (dict)
-    {
-	if (dict_len > M4_MAX_OFFSET)
-	{
-	    dict += dict_len - M4_MAX_OFFSET;
-	    dict_len = M4_MAX_OFFSET;
-	}
-	dict_end = dict + dict_len;
-    }
-    else
-    {
-	dict_len = 0;
-	dict_end = NULL;
-    }
-#endif
-
-    *out_len = 0;
-
-    op = out;
-    ip = in;
-
-    if (*ip > 17)
-    {
-	t = *ip++ - 17;
-	if (t < 4)
-	    goto match_next;
-	assert(t > 0); NEED_OP(t); NEED_IP(t+1);
-	do *op++ = *ip++; while (--t > 0);
-	goto first_literal_run;
-    }
-
-    while (TEST_IP && TEST_OP)
-    {
-	t = *ip++;
-	if (t >= 16)
-	    goto match;
-	if (t == 0)
-	{
-	    NEED_IP(1);
-	    while (*ip == 0)
-	    {
-		t += 255;
-		ip++;
-		NEED_IP(1);
-	    }
-	    t += 15 + *ip++;
-	}
-	assert(t > 0); NEED_OP(t+3); NEED_IP(t+4);
-#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
-#if !defined(LZO_UNALIGNED_OK_4)
-	if (PTR_ALIGNED2_4(op,ip))
-	{
-#endif
-	* (lzo_uint32p) op = * (const lzo_uint32p) ip;
-	op += 4; ip += 4;
-	if (--t > 0)
-	{
-	    if (t >= 4)
-	    {
-		do {
-		    * (lzo_uint32p) op = * (const lzo_uint32p) ip;
-		    op += 4; ip += 4; t -= 4;
-		} while (t >= 4);
-		if (t > 0) do *op++ = *ip++; while (--t > 0);
-	    }
-	    else
-		do *op++ = *ip++; while (--t > 0);
-	}
-#if !defined(LZO_UNALIGNED_OK_4)
-	}
-	else
-#endif
-#endif
-#if !defined(LZO_UNALIGNED_OK_4)
-	{
-	    *op++ = *ip++; *op++ = *ip++; *op++ = *ip++;
-	    do *op++ = *ip++; while (--t > 0);
-	}
-#endif
-
-first_literal_run:
-
-	t = *ip++;
-	if (t >= 16)
-	    goto match;
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-	m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
-	last_m_off = m_off;
-#else
-	m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
-#endif
-	NEED_OP(3);
-	t = 3; COPY_DICT(t,m_off)
-#else
-#if defined(LZO1Z)
-	t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
-	m_pos = op - t;
-	last_m_off = t;
-#else
-	m_pos = op - (1 + M2_MAX_OFFSET);
-	m_pos -= t >> 2;
-	m_pos -= *ip++ << 2;
-#endif
-	TEST_LOOKBEHIND(m_pos,out); NEED_OP(3);
-	*op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos;
-#endif
-	goto match_done;
-
-	while (TEST_IP && TEST_OP)
-	{
-match:
-	    if (t >= 64)
-	    {
-#if defined(COPY_DICT)
-#if defined(LZO1X)
-		m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
-		t = (t >> 5) - 1;
-#elif defined(LZO1Y)
-		m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
-		t = (t >> 4) - 3;
-#elif defined(LZO1Z)
-		m_off = t & 0x1f;
-		if (m_off >= 0x1c)
-		    m_off = last_m_off;
-		else
-		{
-		    m_off = 1 + (m_off << 6) + (*ip++ >> 2);
-		    last_m_off = m_off;
-		}
-		t = (t >> 5) - 1;
-#endif
-#else
-#if defined(LZO1X)
-		m_pos = op - 1;
-		m_pos -= (t >> 2) & 7;
-		m_pos -= *ip++ << 3;
-		t = (t >> 5) - 1;
-#elif defined(LZO1Y)
-		m_pos = op - 1;
-		m_pos -= (t >> 2) & 3;
-		m_pos -= *ip++ << 2;
-		t = (t >> 4) - 3;
-#elif defined(LZO1Z)
-		{
-		    lzo_uint off = t & 0x1f;
-		    m_pos = op;
-		    if (off >= 0x1c)
-		    {
-			assert(last_m_off > 0);
-			m_pos -= last_m_off;
-		    }
-		    else
-		    {
-			off = 1 + (off << 6) + (*ip++ >> 2);
-			m_pos -= off;
-			last_m_off = off;
-		    }
-		}
-		t = (t >> 5) - 1;
-#endif
-		TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
-		goto copy_match;
-#endif
-	    }
-	    else if (t >= 32)
-	    {
-		t &= 31;
-		if (t == 0)
-		{
-		    NEED_IP(1);
-		    while (*ip == 0)
-		    {
-			t += 255;
-			ip++;
-			NEED_IP(1);
-		    }
-		    t += 31 + *ip++;
-		}
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-		m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
-		last_m_off = m_off;
-#else
-		m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
-#endif
-#else
-#if defined(LZO1Z)
-		{
-		    lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2);
-		    m_pos = op - off;
-		    last_m_off = off;
-		}
-#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-		m_pos = op - 1;
-		m_pos -= (* (const lzo_ushortp) ip) >> 2;
-#else
-		m_pos = op - 1;
-		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
-#endif
-#endif
-		ip += 2;
-	    }
-	    else if (t >= 16)
-	    {
-#if defined(COPY_DICT)
-		m_off = (t & 8) << 11;
-#else
-		m_pos = op;
-		m_pos -= (t & 8) << 11;
-#endif
-		t &= 7;
-		if (t == 0)
-		{
-		    NEED_IP(1);
-		    while (*ip == 0)
-		    {
-			t += 255;
-			ip++;
-			NEED_IP(1);
-		    }
-		    t += 7 + *ip++;
-		}
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-		m_off += (ip[0] << 6) + (ip[1] >> 2);
-#else
-		m_off += (ip[0] >> 2) + (ip[1] << 6);
-#endif
-		ip += 2;
-		if (m_off == 0)
-		    goto eof_found;
-		m_off += 0x4000;
-#if defined(LZO1Z)
-		last_m_off = m_off;
-#endif
-#else
-#if defined(LZO1Z)
-		m_pos -= (ip[0] << 6) + (ip[1] >> 2);
-#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-		m_pos -= (* (const lzo_ushortp) ip) >> 2;
-#else
-		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
-#endif
-		ip += 2;
-		if (m_pos == op)
-		    goto eof_found;
-		m_pos -= 0x4000;
-#if defined(LZO1Z)
-		last_m_off = op - m_pos;
-#endif
-#endif
-	    }
-	    else
-	    {
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-		m_off = 1 + (t << 6) + (*ip++ >> 2);
-		last_m_off = m_off;
-#else
-		m_off = 1 + (t >> 2) + (*ip++ << 2);
-#endif
-		NEED_OP(2);
-		t = 2; COPY_DICT(t,m_off)
-#else
-#if defined(LZO1Z)
-		t = 1 + (t << 6) + (*ip++ >> 2);
-		m_pos = op - t;
-		last_m_off = t;
-#else
-		m_pos = op - 1;
-		m_pos -= t >> 2;
-		m_pos -= *ip++ << 2;
-#endif
-		TEST_LOOKBEHIND(m_pos,out); NEED_OP(2);
-		*op++ = *m_pos++; *op++ = *m_pos;
-#endif
-		goto match_done;
-	    }
-
-#if defined(COPY_DICT)
-
-	    NEED_OP(t+3-1);
-	    t += 3-1; COPY_DICT(t,m_off)
-
-#else
-
-	    TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
-#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
-#if !defined(LZO_UNALIGNED_OK_4)
-	    if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos))
-	    {
-		assert((op - m_pos) >= 4);
-#else
-	    if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4)
-	    {
-#endif
-		* (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
-		op += 4; m_pos += 4; t -= 4 - (3 - 1);
-		do {
-		    * (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
-		    op += 4; m_pos += 4; t -= 4;
-		} while (t >= 4);
-		if (t > 0) do *op++ = *m_pos++; while (--t > 0);
-	    }
-	    else
-#endif
-	    {
-copy_match:
-		*op++ = *m_pos++; *op++ = *m_pos++;
-		do *op++ = *m_pos++; while (--t > 0);
-	    }
-
-#endif
-
-match_done:
-#if defined(LZO1Z)
-	    t = ip[-1] & 3;
-#else
-	    t = ip[-2] & 3;
-#endif
-	    if (t == 0)
-		break;
-
-match_next:
-	    assert(t > 0); NEED_OP(t); NEED_IP(t+1);
-	    do *op++ = *ip++; while (--t > 0);
-	    t = *ip++;
-	}
-    }
-
-#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
-    *out_len = op - out;
-    return LZO_E_EOF_NOT_FOUND;
-#endif
-
-eof_found:
-    assert(t == 1);
-    *out_len = op - out;
-    return (ip == ip_end ? LZO_E_OK :
-	   (ip < ip_end  ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
-
-#if defined(HAVE_NEED_IP)
-input_overrun:
-    *out_len = op - out;
-    return LZO_E_INPUT_OVERRUN;
-#endif
-
-#if defined(HAVE_NEED_OP)
-output_overrun:
-    *out_len = op - out;
-    return LZO_E_OUTPUT_OVERRUN;
-#endif
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-lookbehind_overrun:
-    *out_len = op - out;
-    return LZO_E_LOOKBEHIND_OVERRUN;
-#endif
-}
-
-#define LZO_TEST_DECOMPRESS_OVERRUN
-#undef DO_DECOMPRESS
-#define DO_DECOMPRESS       lzo1x_decompress_safe
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
-#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-#    define LZO_TEST_DECOMPRESS_OVERRUN_INPUT       2
-#  endif
-#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-#    define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT      2
-#  endif
-#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-#    define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
-#  endif
-#endif
-
-#undef TEST_IP
-#undef TEST_OP
-#undef TEST_LOOKBEHIND
-#undef NEED_IP
-#undef NEED_OP
-#undef HAVE_TEST_IP
-#undef HAVE_TEST_OP
-#undef HAVE_NEED_IP
-#undef HAVE_NEED_OP
-#undef HAVE_ANY_IP
-#undef HAVE_ANY_OP
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
-#    define TEST_IP             (ip < ip_end)
-#  endif
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
-#    define NEED_IP(x) \
-	    if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x))  goto input_overrun
-#  endif
-#endif
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
-#    define TEST_OP             (op <= op_end)
-#  endif
-#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
-#    undef TEST_OP
-#    define NEED_OP(x) \
-	    if ((lzo_uint)(op_end - op) < (lzo_uint)(x))  goto output_overrun
-#  endif
-#endif
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-#  define TEST_LOOKBEHIND(m_pos,out)    if (m_pos < out) goto lookbehind_overrun
-#else
-#  define TEST_LOOKBEHIND(m_pos,op)     ((void) 0)
-#endif
-
-#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
-#  define TEST_IP               (ip < ip_end)
-#endif
-
-#if defined(TEST_IP)
-#  define HAVE_TEST_IP
-#else
-#  define TEST_IP               1
-#endif
-#if defined(TEST_OP)
-#  define HAVE_TEST_OP
-#else
-#  define TEST_OP               1
-#endif
-
-#if defined(NEED_IP)
-#  define HAVE_NEED_IP
-#else
-#  define NEED_IP(x)            ((void) 0)
-#endif
-#if defined(NEED_OP)
-#  define HAVE_NEED_OP
-#else
-#  define NEED_OP(x)            ((void) 0)
-#endif
-
-#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
-#  define HAVE_ANY_IP
-#endif
-#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
-#  define HAVE_ANY_OP
-#endif
-
-#if defined(DO_DECOMPRESS)
-LZO_PUBLIC(int)
-DO_DECOMPRESS  ( const lzo_byte *in , lzo_uint  in_len,
-		       lzo_byte *out, lzo_uint *out_len,
-		       lzo_voidp wrkmem )
-#endif
-{
-    register lzo_byte *op;
-    register const lzo_byte *ip;
-    register lzo_uint t;
-#if defined(COPY_DICT)
-    lzo_uint m_off;
-    const lzo_byte *dict_end;
-#else
-    register const lzo_byte *m_pos;
-#endif
-
-    const lzo_byte * const ip_end = in + in_len;
-#if defined(HAVE_ANY_OP)
-    lzo_byte * const op_end = out + *out_len;
-#endif
-#if defined(LZO1Z)
-    lzo_uint last_m_off = 0;
-#endif
-
-    LZO_UNUSED(wrkmem);
-
-#if defined(__LZO_QUERY_DECOMPRESS)
-    if (__LZO_IS_DECOMPRESS_QUERY(in,in_len,out,out_len,wrkmem))
-	return __LZO_QUERY_DECOMPRESS(in,in_len,out,out_len,wrkmem,0,0);
-#endif
-
-#if defined(COPY_DICT)
-    if (dict)
-    {
-	if (dict_len > M4_MAX_OFFSET)
-	{
-	    dict += dict_len - M4_MAX_OFFSET;
-	    dict_len = M4_MAX_OFFSET;
-	}
-	dict_end = dict + dict_len;
-    }
-    else
-    {
-	dict_len = 0;
-	dict_end = NULL;
-    }
-#endif
-
-    *out_len = 0;
-
-    op = out;
-    ip = in;
-
-    if (*ip > 17)
-    {
-	t = *ip++ - 17;
-	if (t < 4)
-	    goto match_next;
-	assert(t > 0); NEED_OP(t); NEED_IP(t+1);
-	do *op++ = *ip++; while (--t > 0);
-	goto first_literal_run;
-    }
-
-    while (TEST_IP && TEST_OP)
-    {
-	t = *ip++;
-	if (t >= 16)
-	    goto match;
-	if (t == 0)
-	{
-	    NEED_IP(1);
-	    while (*ip == 0)
-	    {
-		t += 255;
-		ip++;
-		NEED_IP(1);
-	    }
-	    t += 15 + *ip++;
-	}
-	assert(t > 0); NEED_OP(t+3); NEED_IP(t+4);
-#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
-#if !defined(LZO_UNALIGNED_OK_4)
-	if (PTR_ALIGNED2_4(op,ip))
-	{
-#endif
-	* (lzo_uint32p) op = * (const lzo_uint32p) ip;
-	op += 4; ip += 4;
-	if (--t > 0)
-	{
-	    if (t >= 4)
-	    {
-		do {
-		    * (lzo_uint32p) op = * (const lzo_uint32p) ip;
-		    op += 4; ip += 4; t -= 4;
-		} while (t >= 4);
-		if (t > 0) do *op++ = *ip++; while (--t > 0);
-	    }
-	    else
-		do *op++ = *ip++; while (--t > 0);
-	}
-#if !defined(LZO_UNALIGNED_OK_4)
-	}
-	else
-#endif
-#endif
-#if !defined(LZO_UNALIGNED_OK_4)
-	{
-	    *op++ = *ip++; *op++ = *ip++; *op++ = *ip++;
-	    do *op++ = *ip++; while (--t > 0);
-	}
-#endif
-
-first_literal_run:
-
-	t = *ip++;
-	if (t >= 16)
-	    goto match;
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-	m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
-	last_m_off = m_off;
-#else
-	m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
-#endif
-	NEED_OP(3);
-	t = 3; COPY_DICT(t,m_off)
-#else
-#if defined(LZO1Z)
-	t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
-	m_pos = op - t;
-	last_m_off = t;
-#else
-	m_pos = op - (1 + M2_MAX_OFFSET);
-	m_pos -= t >> 2;
-	m_pos -= *ip++ << 2;
-#endif
-	TEST_LOOKBEHIND(m_pos,out); NEED_OP(3);
-	*op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos;
-#endif
-	goto match_done;
-
-	while (TEST_IP && TEST_OP)
-	{
-match:
-	    if (t >= 64)
-	    {
-#if defined(COPY_DICT)
-#if defined(LZO1X)
-		m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
-		t = (t >> 5) - 1;
-#elif defined(LZO1Y)
-		m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
-		t = (t >> 4) - 3;
-#elif defined(LZO1Z)
-		m_off = t & 0x1f;
-		if (m_off >= 0x1c)
-		    m_off = last_m_off;
-		else
-		{
-		    m_off = 1 + (m_off << 6) + (*ip++ >> 2);
-		    last_m_off = m_off;
-		}
-		t = (t >> 5) - 1;
-#endif
-#else
-#if defined(LZO1X)
-		m_pos = op - 1;
-		m_pos -= (t >> 2) & 7;
-		m_pos -= *ip++ << 3;
-		t = (t >> 5) - 1;
-#elif defined(LZO1Y)
-		m_pos = op - 1;
-		m_pos -= (t >> 2) & 3;
-		m_pos -= *ip++ << 2;
-		t = (t >> 4) - 3;
-#elif defined(LZO1Z)
-		{
-		    lzo_uint off = t & 0x1f;
-		    m_pos = op;
-		    if (off >= 0x1c)
-		    {
-			assert(last_m_off > 0);
-			m_pos -= last_m_off;
-		    }
-		    else
-		    {
-			off = 1 + (off << 6) + (*ip++ >> 2);
-			m_pos -= off;
-			last_m_off = off;
-		    }
-		}
-		t = (t >> 5) - 1;
-#endif
-		TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
-		goto copy_match;
-#endif
-	    }
-	    else if (t >= 32)
-	    {
-		t &= 31;
-		if (t == 0)
-		{
-		    NEED_IP(1);
-		    while (*ip == 0)
-		    {
-			t += 255;
-			ip++;
-			NEED_IP(1);
-		    }
-		    t += 31 + *ip++;
-		}
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-		m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
-		last_m_off = m_off;
-#else
-		m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
-#endif
-#else
-#if defined(LZO1Z)
-		{
-		    lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2);
-		    m_pos = op - off;
-		    last_m_off = off;
-		}
-#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-		m_pos = op - 1;
-		m_pos -= (* (const lzo_ushortp) ip) >> 2;
-#else
-		m_pos = op - 1;
-		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
-#endif
-#endif
-		ip += 2;
-	    }
-	    else if (t >= 16)
-	    {
-#if defined(COPY_DICT)
-		m_off = (t & 8) << 11;
-#else
-		m_pos = op;
-		m_pos -= (t & 8) << 11;
-#endif
-		t &= 7;
-		if (t == 0)
-		{
-		    NEED_IP(1);
-		    while (*ip == 0)
-		    {
-			t += 255;
-			ip++;
-			NEED_IP(1);
-		    }
-		    t += 7 + *ip++;
-		}
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-		m_off += (ip[0] << 6) + (ip[1] >> 2);
-#else
-		m_off += (ip[0] >> 2) + (ip[1] << 6);
-#endif
-		ip += 2;
-		if (m_off == 0)
-		    goto eof_found;
-		m_off += 0x4000;
-#if defined(LZO1Z)
-		last_m_off = m_off;
-#endif
-#else
-#if defined(LZO1Z)
-		m_pos -= (ip[0] << 6) + (ip[1] >> 2);
-#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-		m_pos -= (* (const lzo_ushortp) ip) >> 2;
-#else
-		m_pos -= (ip[0] >> 2) + (ip[1] << 6);
-#endif
-		ip += 2;
-		if (m_pos == op)
-		    goto eof_found;
-		m_pos -= 0x4000;
-#if defined(LZO1Z)
-		last_m_off = op - m_pos;
-#endif
-#endif
-	    }
-	    else
-	    {
-#if defined(COPY_DICT)
-#if defined(LZO1Z)
-		m_off = 1 + (t << 6) + (*ip++ >> 2);
-		last_m_off = m_off;
-#else
-		m_off = 1 + (t >> 2) + (*ip++ << 2);
-#endif
-		NEED_OP(2);
-		t = 2; COPY_DICT(t,m_off)
-#else
-#if defined(LZO1Z)
-		t = 1 + (t << 6) + (*ip++ >> 2);
-		m_pos = op - t;
-		last_m_off = t;
-#else
-		m_pos = op - 1;
-		m_pos -= t >> 2;
-		m_pos -= *ip++ << 2;
-#endif
-		TEST_LOOKBEHIND(m_pos,out); NEED_OP(2);
-		*op++ = *m_pos++; *op++ = *m_pos;
-#endif
-		goto match_done;
-	    }
-
-#if defined(COPY_DICT)
-
-	    NEED_OP(t+3-1);
-	    t += 3-1; COPY_DICT(t,m_off)
-
-#else
-
-	    TEST_LOOKBEHIND(m_pos,out); assert(t > 0); NEED_OP(t+3-1);
-#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
-#if !defined(LZO_UNALIGNED_OK_4)
-	    if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos))
-	    {
-		assert((op - m_pos) >= 4);
-#else
-	    if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4)
-	    {
-#endif
-		* (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
-		op += 4; m_pos += 4; t -= 4 - (3 - 1);
-		do {
-		    * (lzo_uint32p) op = * (const lzo_uint32p) m_pos;
-		    op += 4; m_pos += 4; t -= 4;
-		} while (t >= 4);
-		if (t > 0) do *op++ = *m_pos++; while (--t > 0);
-	    }
-	    else
-#endif
-	    {
-copy_match:
-		*op++ = *m_pos++; *op++ = *m_pos++;
-		do *op++ = *m_pos++; while (--t > 0);
-	    }
-
-#endif
-
-match_done:
-#if defined(LZO1Z)
-	    t = ip[-1] & 3;
-#else
-	    t = ip[-2] & 3;
-#endif
-	    if (t == 0)
-		break;
-
-match_next:
-	    assert(t > 0); NEED_OP(t); NEED_IP(t+1);
-	    do *op++ = *ip++; while (--t > 0);
-	    t = *ip++;
-	}
-    }
-
-#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
-    *out_len = op - out;
-    return LZO_E_EOF_NOT_FOUND;
-#endif
-
-eof_found:
-    assert(t == 1);
-    *out_len = op - out;
-    return (ip == ip_end ? LZO_E_OK :
-	   (ip < ip_end  ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
-
-#if defined(HAVE_NEED_IP)
-input_overrun:
-    *out_len = op - out;
-    return LZO_E_INPUT_OVERRUN;
-#endif
-
-#if defined(HAVE_NEED_OP)
-output_overrun:
-    *out_len = op - out;
-    return LZO_E_OUTPUT_OVERRUN;
-#endif
-
-#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-lookbehind_overrun:
-    *out_len = op - out;
-    return LZO_E_LOOKBEHIND_OVERRUN;
-#endif
-}
-
-/***** End of minilzo.c *****/
-
--- a/minilzo.h	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-/* minilzo.h -- mini subset of the LZO real-time data compression library
-
-   This file is part of the LZO real-time data compression library.
-
-   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
-   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
-
-   The LZO library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of
-   the License, or (at your option) any later version.
-
-   The LZO library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with the LZO library; see the file COPYING.
-   If not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-   Markus F.X.J. Oberhumer
-   <markus.oberhumer@jk.uni-linz.ac.at>
-   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
- */
-
-/*
- * NOTE:
- *   the full LZO package can be found at
- *   http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html
- */
-
-
-#ifndef __MINILZO_H
-#define __MINILZO_H
-
-#define MINILZO_VERSION         0x1070
-
-#ifdef __LZOCONF_H
-#  error "you cannot use both LZO and miniLZO"
-#endif
-
-#undef LZO_HAVE_CONFIG_H
-#include "lzoconf.h"
-
-#if !defined(LZO_VERSION) || (LZO_VERSION != MINILZO_VERSION)
-#  error "version mismatch in header files"
-#endif
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/***********************************************************************
-//
-************************************************************************/
-
-/* Memory required for the wrkmem parameter.
- * When the required size is 0, you can also pass a NULL pointer.
- */
-
-#define LZO1X_MEM_COMPRESS      LZO1X_1_MEM_COMPRESS
-#define LZO1X_1_MEM_COMPRESS    ((lzo_uint32) (16384L * lzo_sizeof_dict_t))
-#define LZO1X_MEM_DECOMPRESS    (0)
-
-
-/* compression */
-LZO_EXTERN(int)
-lzo1x_1_compress        ( const lzo_byte *src, lzo_uint  src_len,
-                                lzo_byte *dst, lzo_uint *dst_len,
-                                lzo_voidp wrkmem );
-
-/* decompression */
-LZO_EXTERN(int)
-lzo1x_decompress        ( const lzo_byte *src, lzo_uint  src_len,
-                                lzo_byte *dst, lzo_uint *dst_len,
-                                lzo_voidp wrkmem /* NOT USED */ );
-
-/* safe decompression with overrun testing */
-LZO_EXTERN(int)
-lzo1x_decompress_safe   ( const lzo_byte *src, lzo_uint  src_len,
-                                lzo_byte *dst, lzo_uint *dst_len,
-                                lzo_voidp wrkmem /* NOT USED */ );
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* already included */
-
--- a/msvidc.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/*
-    Microsoft Video 1 Decoder
-    
-    (C) 2001 Mike Melanson
-    
-    The description of the algorithm you can read here:
-      http://www.pcisys.net/~melanson/codecs/
-
-    32bpp support (c) alex
-*/
-
-#include "config.h"
-#include "bswap.h"
-
-#define LE_16(x) (le2me_16(*(unsigned short *)(x)))
-
-#define DECODE_BGR555_TO_BGR888(x) \
-        x.c1_b = (x.c1 >> 7) & 0xF8; \
-        x.c1_g = (x.c1 >> 2) & 0xF8; \
-        x.c1_r = (x.c1 << 3) & 0xF8; \
-        x.c2_b = (x.c2 >> 7) & 0xF8; \
-        x.c2_g = (x.c2 >> 2) & 0xF8; \
-        x.c2_r = (x.c2 << 3) & 0xF8;
-
-#define DECODE_PALETTE_TO_BGR888(x) \
-        x.c1_b = palette_map[x.c1 * 4 + 2]; \
-        x.c1_g = palette_map[x.c1 * 4 + 1]; \
-        x.c1_r = palette_map[x.c1 * 4 + 0]; \
-        x.c2_b = palette_map[x.c2 * 4 + 2]; \
-        x.c2_g = palette_map[x.c2 * 4 + 1]; \
-        x.c2_r = palette_map[x.c2 * 4 + 0];
-
-struct
-{
-  unsigned short c1, c2;
-  unsigned char c1_r, c1_g, c1_b;
-  unsigned char c2_r, c2_g, c2_b;
-} quad[2][2];
-
-void AVI_Decode_Video1_16(
-  char *encoded,
-  int encoded_size,
-  char *decoded,
-  int width,
-  int height,
-  int bytes_per_pixel)
-{
-  int block_ptr, pixel_ptr;
-  int total_blocks;
-  int pixel_x, pixel_y;  // pixel width and height iterators
-  int block_x, block_y;  // block width and height iterators
-  int blocks_wide, blocks_high;  // width and height in 4x4 blocks
-  int block_inc;
-  int row_dec;
-
-  // decoding parameters
-  int stream_ptr;
-  unsigned char byte_a, byte_b;
-  unsigned short flags;
-  int skip_blocks;
-
-  stream_ptr = 0;
-  skip_blocks = 0;
-  blocks_wide = width / 4;
-  blocks_high = height / 4;
-  total_blocks = blocks_wide * blocks_high;
-  block_inc = 4 * bytes_per_pixel;
-  row_dec = (width + 4) * bytes_per_pixel;
-
-  for (block_y = blocks_high; block_y > 0; block_y--)
-  {
-    block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel);
-    for (block_x = blocks_wide; block_x > 0; block_x--)
-    {
-      // check if this block should be skipped
-      if (skip_blocks)
-      {
-        block_ptr += block_inc;
-        skip_blocks--;
-        total_blocks--;
-        continue;
-      }
-
-      pixel_ptr = block_ptr;
-
-      // get the next two bytes in the encoded data stream
-      byte_a = encoded[stream_ptr++];
-      byte_b = encoded[stream_ptr++];
-
-      // check if the decode is finished
-      if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0))
-        return;
-
-      // check if this is a skip code
-      else if ((byte_b & 0xFC) == 0x84)
-      {
-        // but don't count the current block
-        skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1;
-      }
-
-      // check if this is in the 2- or 8-color classes
-      else if (byte_b < 0x80)
-      {
-        flags = (byte_b << 8) | byte_a;
-
-        quad[0][0].c1 = LE_16(&encoded[stream_ptr]);
-        stream_ptr += 2;
-        quad[0][0].c2 = LE_16(&encoded[stream_ptr]);
-        stream_ptr += 2;
-
-        DECODE_BGR555_TO_BGR888(quad[0][0]);
-
-        if (quad[0][0].c1 & 0x8000)
-        {
-          // 8-color encoding
-          quad[1][0].c1 = LE_16(&encoded[stream_ptr]);
-          stream_ptr += 2;
-          quad[1][0].c2 = LE_16(&encoded[stream_ptr]);
-          stream_ptr += 2;
-          quad[0][1].c1 = LE_16(&encoded[stream_ptr]);
-          stream_ptr += 2;
-          quad[0][1].c2 = LE_16(&encoded[stream_ptr]);
-          stream_ptr += 2;
-          quad[1][1].c1 = LE_16(&encoded[stream_ptr]);
-          stream_ptr += 2;
-          quad[1][1].c2 = LE_16(&encoded[stream_ptr]);
-          stream_ptr += 2;
-
-          DECODE_BGR555_TO_BGR888(quad[0][1]);
-          DECODE_BGR555_TO_BGR888(quad[1][0]);
-          DECODE_BGR555_TO_BGR888(quad[1][1]);
-
-          for (pixel_y = 0; pixel_y < 4; pixel_y++)
-          {
-            for (pixel_x = 0; pixel_x < 4; pixel_x++)
-            {
-              if (flags & 1)
-              {
-                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r;
-                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g;
-                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b;
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-              else
-              {
-                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r;
-                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g;
-                decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b;
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-
-              // get the next flag ready to go
-              flags >>= 1;
-            }
-            pixel_ptr -= row_dec;
-          }
-        }
-        else
-        {
-          // 2-color encoding
-          for (pixel_y = 0; pixel_y < 4; pixel_y++)
-          {
-            for (pixel_x = 0; pixel_x < 4; pixel_x++)
-            {
-              if (flags & 1)
-              {
-                decoded[pixel_ptr++] = quad[0][0].c1_r;
-                decoded[pixel_ptr++] = quad[0][0].c1_g;
-                decoded[pixel_ptr++] = quad[0][0].c1_b;
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-              else
-              {
-                decoded[pixel_ptr++] = quad[0][0].c2_r;
-                decoded[pixel_ptr++] = quad[0][0].c2_g;
-                decoded[pixel_ptr++] = quad[0][0].c2_b;
-		if (bytes_per_pixel == 4) /* 32bpp */
-		    pixel_ptr++;
-              }
-
-              // get the next flag ready to go
-              flags >>= 1;
-            }
-            pixel_ptr -= row_dec;
-          }
-        }
-      }
-
-      // otherwise, it's a 1-color block
-      else
-      {
-        quad[0][0].c1 = (byte_b << 8) | byte_a;
-        DECODE_BGR555_TO_BGR888(quad[0][0]);
-
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            decoded[pixel_ptr++] = quad[0][0].c1_r;
-            decoded[pixel_ptr++] = quad[0][0].c1_g;
-            decoded[pixel_ptr++] = quad[0][0].c1_b;
-	    if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-          }
-          pixel_ptr -= row_dec;
-        }
-      }
-
-      block_ptr += block_inc;
-      total_blocks--;
-    }
-  }
-}
-
-void AVI_Decode_Video1_8(
-  char *encoded,
-  int encoded_size,
-  char *decoded,
-  int width,
-  int height,
-  unsigned char *palette_map,
-  int bytes_per_pixel)
-{
-  int block_ptr, pixel_ptr;
-  int total_blocks;
-  int pixel_x, pixel_y;  // pixel width and height iterators
-  int block_x, block_y;  // block width and height iterators
-  int blocks_wide, blocks_high;  // width and height in 4x4 blocks
-  int block_inc;
-  int row_dec;
-
-  // decoding parameters
-  int stream_ptr;
-  unsigned char byte_a, byte_b;
-  unsigned short flags;
-  int skip_blocks;
-
-  stream_ptr = 0;
-  skip_blocks = 0;
-  blocks_wide = width / 4;
-  blocks_high = height / 4;
-  total_blocks = blocks_wide * blocks_high;
-  block_inc = 4 * bytes_per_pixel;
-  row_dec = (width + 4) * bytes_per_pixel;
-
-  for (block_y = blocks_high; block_y > 0; block_y--)
-  {
-    block_ptr = ((block_y * 4) - 1) * (width * bytes_per_pixel);
-    for (block_x = blocks_wide; block_x > 0; block_x--)
-    {
-      // check if this block should be skipped
-      if (skip_blocks)
-      {
-        block_ptr += block_inc;
-        skip_blocks--;
-        total_blocks--;
-        continue;
-      }
-
-      pixel_ptr = block_ptr;
-
-      // get the next two bytes in the encoded data stream
-      byte_a = encoded[stream_ptr++];
-      byte_b = encoded[stream_ptr++];
-
-      // check if the decode is finished
-      if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0))
-        return;
-
-      // check if this is a skip code
-      else if ((byte_b & 0xFC) == 0x84)
-      {
-        // but don't count the current block
-        skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1;
-      }
-
-      // check if this is a 2-color block
-      else if (byte_b < 0x80)
-      {
-        flags = (byte_b << 8) | byte_a;
-
-        quad[0][0].c1 = (unsigned char)encoded[stream_ptr++];
-        quad[0][0].c2 = (unsigned char)encoded[stream_ptr++];
-
-        DECODE_PALETTE_TO_BGR888(quad[0][0]);
-
-        // 2-color encoding
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            if (flags & 1)
-            {
-              decoded[pixel_ptr++] = quad[0][0].c1_r;
-              decoded[pixel_ptr++] = quad[0][0].c1_g;
-              decoded[pixel_ptr++] = quad[0][0].c1_b;
-	      if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-            }
-            else
-            {
-              decoded[pixel_ptr++] = quad[0][0].c2_r;
-              decoded[pixel_ptr++] = quad[0][0].c2_g;
-              decoded[pixel_ptr++] = quad[0][0].c2_b;
-	      if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-            }
-
-            // get the next flag ready to go
-            flags >>= 1;
-          }
-          pixel_ptr -= row_dec;
-        }
-      }
-
-      // check if it's an 8-color block
-      else if (byte_b >= 0x90)
-      {
-        flags = (byte_b << 8) | byte_a;
-
-        quad[0][0].c1 = (unsigned char)encoded[stream_ptr++];
-        quad[0][0].c2 = (unsigned char)encoded[stream_ptr++];
-        quad[1][0].c1 = (unsigned char)encoded[stream_ptr++];
-        quad[1][0].c2 = (unsigned char)encoded[stream_ptr++];
-
-        quad[0][1].c1 = (unsigned char)encoded[stream_ptr++];
-        quad[0][1].c2 = (unsigned char)encoded[stream_ptr++];
-        quad[1][1].c1 = (unsigned char)encoded[stream_ptr++];
-        quad[1][1].c2 = (unsigned char)encoded[stream_ptr++];
-
-        DECODE_PALETTE_TO_BGR888(quad[0][0]);
-        DECODE_PALETTE_TO_BGR888(quad[0][1]);
-        DECODE_PALETTE_TO_BGR888(quad[1][0]);
-        DECODE_PALETTE_TO_BGR888(quad[1][1]);
-
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            if (flags & 1)
-            {
-              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_r;
-              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_g;
-              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c1_b;
-	      if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-            }
-            else
-            {
-              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_r;
-              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_g;
-              decoded[pixel_ptr++] = quad[pixel_x >> 1][pixel_y >> 1].c2_b;
-	      if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-            }
-
-            // get the next flag ready to go
-            flags >>= 1;
-          }
-          pixel_ptr -= row_dec;
-        }
-      }
-
-      // otherwise, it's a 1-color block
-      else
-      {
-        // init c2 along with c1 just so c2 is a known value for macro
-        quad[0][0].c1 = quad[0][0].c2 = byte_a;
-        DECODE_PALETTE_TO_BGR888(quad[0][0]);
-
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            decoded[pixel_ptr++] = quad[0][0].c1_r;
-            decoded[pixel_ptr++] = quad[0][0].c1_g;
-            decoded[pixel_ptr++] = quad[0][0].c1_b;
-	    if (bytes_per_pixel == 4) /* 32bpp */
-		pixel_ptr++;
-          }
-          pixel_ptr -= row_dec;
-        }
-      }
-
-      block_ptr += block_inc;
-      total_blocks--;
-    }
-  }
-}
-
--- a/nuppelvideo.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-/*
- * NuppelVideo 0.05 file parser
- * for MPlayer
- * by Panagiotis Issaris <takis@lumumba.luc.ac.be>
- *
- * Reworked by alex
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "config.h"
-#include "mp_msg.h"
-
-#include "fastmemcpy.h"
-
-#include "libmpdemux/nuppelvideo.h" 
-#include "RTjpegN.h"
-#include "minilzo.h"
-
-#define KEEP_BUFFER
-
-void decode_nuv( unsigned char *encoded, int encoded_size,
-		unsigned char *decoded, int width, int height)
-{
-	int r;
-	unsigned int out_len;
-	struct rtframeheader *encodedh = ( struct rtframeheader* ) encoded;
-	static unsigned char *buffer = 0; /* for RTJpeg with LZO decompress */
-#ifdef KEEP_BUFFER
-	static unsigned char *previous_buffer = 0; /* to support Last-frame-copy */
-#endif
-	static int is_lzo_inited = 0;
-
-//	printf("frametype: %c, comtype: %c, encoded_size: %d, width: %d, height: %d\n",
-//	    encodedh->frametype, encodedh->comptype, encoded_size, width, height);
-
-	switch(encodedh->frametype)
-	{
-	    case 'D':	/* additional data for compressors */
-	    {
-		/* tables are in encoded */
-		if (encodedh->comptype == 'R')
-		{
-		    RTjpeg_init_decompress ( encoded+12, width, height );
-		    mp_msg(MSGT_DECVIDEO, MSGL_V, "Found RTjpeg tables (size: %d, width: %d, height: %d)\n",
-			encoded_size-12, width, height);
-		}
-		break;
-	    }
-	    case 'V':
-	    {
-#ifdef KEEP_BUFFER		
-		if (!previous_buffer) 
-			previous_buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 );
-#endif
-
-		if (((encodedh->comptype == '2') ||
-		    (encodedh->comptype == '3')) && !is_lzo_inited)
-		{
-		    /* frame using lzo, init lzo first if not inited */
-		    if ( lzo_init() != LZO_E_OK ) 
-		    {
-			mp_msg(MSGT_DECVIDEO, MSGL_ERR, "LZO init failed\n");
-			return;
-		    }
-		    is_lzo_inited = 1;
-		}
-
-		switch(encodedh->comptype)
-		{
-		    case '0': /* raw YUV420 */
-			memcpy(decoded, encoded + 12, width*height*3/2);
-			break;
-		    case '1': /* RTJpeg */
-			RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded );
-			break;
-		    case '2': /* RTJpeg with LZO */
-			if (!buffer) 
-			    buffer = ( unsigned char * ) malloc ( width * height + ( width * height ) / 2 );
-			if (!buffer)
-			{
-			    mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
-			    break;
-			}
-			r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, buffer, &out_len, NULL );
-			if ( r != LZO_E_OK ) 
-			{
-			    mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
-			    break;
-			}
-			RTjpeg_decompressYUV420 ( ( __s8 * ) buffer, decoded );
-			break;
-		    case '3': /* raw YUV420 with LZO */
-			r = lzo1x_decompress ( encoded + 12, encodedh->packetlength, decoded, &out_len, NULL );
-			if ( r != LZO_E_OK ) 
-			{
-			    mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
-			    break;
-			}
-			break;
-		    case 'N': /* black frame */
-			memset ( decoded, 0,  width * height );
-			memset ( decoded + width * height, 127, width * height / 2);
-			break;
-		    case 'L': /* copy last frame */
-#ifdef KEEP_BUFFER
-			memcpy ( decoded, previous_buffer, width*height*3/2);
-#endif
-			break;
-		}
-
-#ifdef KEEP_BUFFER
-		memcpy(previous_buffer, decoded, width*height*3/2);
-#endif
-		break;
-	    }
-	    default:
-		mp_msg(MSGT_DECVIDEO, MSGL_V, "Nuppelvideo: unknwon frametype: %c\n",
-		    encodedh->frametype);
-	}
-}
--- a/qtrle.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,125 +0,0 @@
-/*
-    Quicktime Animation (RLE) Decoder for MPlayer
-
-    (C) 2001 Mike Melanson
-*/
-
-#include "config.h"
-#include "bswap.h"
-
-#define BE_16(x) (be2me_16(*(unsigned short *)(x)))
-#define BE_32(x) (be2me_32(*(unsigned int *)(x)))
-
-// 256 RGB entries; 25% of these bytes will be unused, but it's faster
-// to index 4-byte entries
-static unsigned char palette[256 * 4];
-
-void qt_decode_rle24(
-  unsigned char *encoded,
-  int encoded_size,
-  unsigned char *decoded,
-  int width,
-  int height,
-  int bytes_per_pixel)
-{
-  int stream_ptr;
-  int header;
-  int start_line;
-  int lines_to_change;
-  signed char rle_code;
-  int row_ptr, pixel_ptr;
-  int row_inc = bytes_per_pixel * width;
-  unsigned char r, g, b;
-
-  // check if this frame is even supposed to change
-  if (encoded_size < 8)
-    return;
-
-  // start after the chunk size
-  stream_ptr = 4;
-
-  // fetch the header
-  header = BE_16(&encoded[stream_ptr]);
-  stream_ptr += 2;
-
-  // if a header is present, fetch additional decoding parameters
-  if (header & 0x0008)
-  {
-    start_line = BE_16(&encoded[stream_ptr]);
-    stream_ptr += 4;
-    lines_to_change = BE_16(&encoded[stream_ptr]);
-    stream_ptr += 4;
-  }
-  else
-  {
-    start_line = 0;
-    lines_to_change = height;
-  }
-
-  row_ptr = row_inc * start_line;
-  while (lines_to_change--)
-  {
-    pixel_ptr = row_ptr + ((encoded[stream_ptr++] - 1) * bytes_per_pixel);
-
-    while ((rle_code = (signed char)encoded[stream_ptr++]) != -1)
-    {
-      if (rle_code == 0)
-        // there's another skip code in the stream
-        pixel_ptr += ((encoded[stream_ptr++] - 1) * bytes_per_pixel);
-      else if (rle_code < 0)
-      {
-        // decode the run length code
-        rle_code = -rle_code;
-        r = encoded[stream_ptr++];
-        g = encoded[stream_ptr++];
-        b = encoded[stream_ptr++];
-        while (rle_code--)
-        {
-          decoded[pixel_ptr++] = b;
-          decoded[pixel_ptr++] = g;
-          decoded[pixel_ptr++] = r;
-          if (bytes_per_pixel == 4)
-            pixel_ptr++;
-        }
-      }
-      else
-      {
-        // copy pixels directly to output
-        while (rle_code--)
-        {
-          decoded[pixel_ptr++] = encoded[stream_ptr + 2];
-          decoded[pixel_ptr++] = encoded[stream_ptr + 1];
-          decoded[pixel_ptr++] = encoded[stream_ptr + 0];
-          stream_ptr += 3;
-          if (bytes_per_pixel == 4)
-            pixel_ptr++;
-        }
-      }
-    }
-
-    row_ptr += row_inc;
-  }
-}
-
-void qt_decode_rle(
-  unsigned char *encoded,
-  int encoded_size,
-  unsigned char *decoded,
-  int width,
-  int height,
-  int encoded_bpp,
-  int bytes_per_pixel)
-{
-  switch (encoded_bpp)
-  {
-    case 24:
-      qt_decode_rle24(
-        encoded,
-        encoded_size,
-        decoded,
-        width,
-        height,
-        bytes_per_pixel);
-      break;
-  }
-}
--- a/qtrpza.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,264 +0,0 @@
-/*
- *
- * Apple Video (rpza) QuickTime Decoder for Mplayer
- * (c) 2002 Roberto Togni
- *
- * Fourcc: rpza, azpr
- *
- * Some code comes from qtsmc.c by Mike Melanson
- *
- * A description of the decoding algorithm can be found here:
- *   http://www.pcisys.net/~melanson/codecs/
- */
-
-#include "config.h"
-#include "bswap.h"
-#include "mp_msg.h"
-
-#define BE_16(x) (be2me_16(*(unsigned short *)(x)))
-#define BE_32(x) (be2me_32(*(unsigned int *)(x)))
-
-
-#define ADVANCE_BLOCK() \
-{ \
-	pixel_ptr += block_x_inc; \
-	if (pixel_ptr >= (width * bytes_per_pixel)) \
-	{ \
-		pixel_ptr = 0; \
-		row_ptr += block_y_inc * 4; \
-	} \
-	total_blocks--; \
-	if (total_blocks < 0) \
-	{ \
-		mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \
-		return; \
-	} \
-}
-
-#define PAINT_CURRENT_PIXEL(r, g, b, color) \
-{ \
-	if (bytes_per_pixel == 2) { \
-		(*(unsigned short*)(&decoded[block_ptr])) = color & 0x7fff; \
-		block_ptr += 2; \
-	} else { \
-		decoded[block_ptr++] = (b); \
-		decoded[block_ptr++] = (g); \
-		decoded[block_ptr++] = (r); \
-		if (bytes_per_pixel == 4) /* 32bpp */ \
-			block_ptr++; \
-	} \
-}
-
-#define COLOR_FIX(col_out, col_in) (col_out) = ((col_in) << 3) | ((col_in) >> 2)
-
-#define COLOR_TO_RGB(r, g, b, color) \
-{ \
-	if (bytes_per_pixel != 2) { \
-		unsigned short tmp; \
-		tmp = (color >> 10) & 0x1f; \
-		COLOR_FIX (r, tmp); \
-		tmp = (color >> 5) & 0x1f; \
-		COLOR_FIX (g, tmp); \
-		tmp = color & 0x1f; \
-		COLOR_FIX (b, tmp); \
-	} \
-}
-
-#define COLORAB_TO_RGB4(rgb4, color4, colorA, colorB) \
-{ \
-	unsigned short ta, tb, tt; \
-	if (bytes_per_pixel != 2) { \
-		ta = (colorA >> 10) & 0x1f; \
-		tb = (colorB >> 10) & 0x1f; \
-		COLOR_FIX (rgb4[3][0], ta); \
-		COLOR_FIX (rgb4[0][0], tb); \
-		tt = (11 * ta + 21 * tb) >> 5; \
-		COLOR_FIX (rgb4[1][0], tt); \
-		tt = (21 * ta + 11 * tb) >> 5; \
-		COLOR_FIX (rgb4[2][0], tt); \
-		ta = (colorA >> 5) & 0x1f; \
-		tb = (colorB >> 5) & 0x1f; \
-		COLOR_FIX (rgb4[3][1], ta); \
-		COLOR_FIX (rgb4[0][1], tb); \
-		tt = (11 * ta + 21 * tb) >> 5; \
-		COLOR_FIX (rgb4[1][1], tt); \
-		tt = (21 * ta + 11 * tb) >> 5; \
-		COLOR_FIX (rgb4[2][1], tt); \
-		ta = colorA	& 0x1f; \
-		tb = colorB	& 0x1f; \
-		COLOR_FIX (rgb4[3][2], ta); \
-		COLOR_FIX (rgb4[0][2], tb); \
-		tt = (11 * ta + 21 * tb) >> 5; \
-		COLOR_FIX (rgb4[1][2], tt); \
-		tt = (21 * ta + 11 * tb) >> 5; \
-		COLOR_FIX (rgb4[2][2], tt); \
-	} else { \
-		color4[3] = colorA; \
-		color4[0] = colorB; \
-		ta = (colorA >> 10) & 0x1f; \
-		tb = (colorB >> 10) & 0x1f; \
-		color4[1] = ((11 * ta + 21 * tb) << 5) & 0x7c00; \
-		color4[2] = ((21 * ta + 11 * tb) << 5) & 0x7c00; \
-		ta = (colorA >> 5) & 0x1f; \
-		tb = (colorB >> 5) & 0x1f; \
-		color4[1] |= (11 * ta + 21 * tb) & 0x3e0; \
-		color4[2] |= (21 * ta + 11 * tb) & 0x3e0; \
-		ta = colorA	& 0x1f; \
-		tb = colorB	& 0x1f; \
-		color4[1] |= (11 * ta + 21 * tb) >> 5; \
-		color4[2] |= (21 * ta + 11 * tb) >> 5; \
-	} \
-}
-
-
-
-/*
- * rpza frame decoder
- *
- * Input values:
- *
- *	*encoded: buffer of encoded data (chunk)
- *	encoded_size: length of encoded buffer
- *	*decoded: buffer where decoded data is written (image buffer)
- *	width: width of decoded frame in pixels
- *	height: height of decoded frame in pixels
- *	bytes_per_pixel: bytes/pixel in output image (color depth)
- *
- */
-
-void qt_decode_rpza(char *encoded, int encoded_size, char *decoded, int width,
-										int height, int bytes_per_pixel)
-{
-
-	int i;
-	int stream_ptr = 0;
-	int chunk_size;
-	unsigned char opcode;
-	int n_blocks;
-	unsigned short colorA, colorB;
-	unsigned char r, g, b;
-	unsigned char rgb4[4][3];
-	unsigned short color4[4];
-	unsigned char index, idx;
-
-	int row_ptr = 0;
-	int pixel_ptr = 0;
-	int pixel_x, pixel_y;
-	int row_inc = bytes_per_pixel * (width - 4);
-	int max_height = row_inc * height;
-	int block_x_inc = bytes_per_pixel * 4;
-	int block_y_inc = bytes_per_pixel * width;
-	int block_ptr;
-	int total_blocks;
-
-	
-	/* First byte is always 0xe1. Warn if it's different */
-	if ((unsigned char)encoded[stream_ptr] != 0xe1)
-		mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-					 "First chunk byte is 0x%02x instead of 0x1e\n",
-					 (unsigned char)encoded[stream_ptr]);
-
-	/* Get chunk size, ingnoring first byte */
-	chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF;
-	stream_ptr += 4;
-
-	/* If length mismatch use size from MOV file and try to decode anyway */
-	if (chunk_size != encoded_size)
-		mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n");
-
-	chunk_size = encoded_size;
-
-	/* Number of 4x4 blocks in frame. */
-	total_blocks = (width * height) / (4 * 4);
-
-	/* Process chunk data */
-	while (stream_ptr < chunk_size) {
-		opcode = encoded[stream_ptr++]; /* Get opcode */
-
-		n_blocks = (opcode & 0x1f) +1; /* Extract block counter from opcode */
-
-		/* If opcode MSbit is 0, we need more data to decide what to do */
-		if ((opcode & 0x80) == 0) {
-			colorA = (opcode << 8) | ((unsigned char)encoded[stream_ptr++]);
-			opcode = 0;
-			if ((encoded[stream_ptr] & 0x80) != 0) {
-				/* Must behave as opcode 110xxxxx, using colorA computed above.*/
-				/* Use fake opcode 0x20 to enter switch block at the right place */
-				opcode = 0x20;
-				n_blocks = 1;
-			}
-		}
-		switch (opcode & 0xe0) {
-			/* Skip blocks */
-			case 0x80:
-				while (n_blocks--)
-					ADVANCE_BLOCK();
-				break;
-
-			/* Fill blocks with one color */
-			case 0xa0:
-				colorA = BE_16 (&encoded[stream_ptr]);
-				stream_ptr += 2;
-				COLOR_TO_RGB (r, g, b, colorA);
-				while (n_blocks--) {
-					block_ptr = row_ptr + pixel_ptr;
-					for (pixel_y = 0; pixel_y < 4; pixel_y++) {
-						for (pixel_x = 0; pixel_x < 4; pixel_x++){
-							PAINT_CURRENT_PIXEL(r, g, b, colorA);
-						}
-						block_ptr += row_inc;
-					}
-					ADVANCE_BLOCK();
-				}
-				break;
-
-			/* Fill blocks with 4 colors */
-			case 0xc0:
-				colorA = BE_16 (&encoded[stream_ptr]);
-				stream_ptr += 2;
-			case 0x20:
-				colorB = BE_16 (&encoded[stream_ptr]);
-				stream_ptr += 2;
-				COLORAB_TO_RGB4 (rgb4, color4, colorA, colorB);
-				while (n_blocks--) {
-					block_ptr = row_ptr + pixel_ptr;
-					for (pixel_y = 0; pixel_y < 4; pixel_y++) {
-						index = encoded[stream_ptr++];
-						for (pixel_x = 0; pixel_x < 4; pixel_x++){
-							idx = (index >> (2 * (3 - pixel_x))) & 0x03;
-							PAINT_CURRENT_PIXEL(rgb4[idx][0], rgb4[idx][1], rgb4[idx][2], color4[idx]);
-						}
-						block_ptr += row_inc;
-					}
-					ADVANCE_BLOCK();
-				}
-				break;
-				
-			/* Fill block with 16 colors */
-			case 0x00:
-				block_ptr = row_ptr + pixel_ptr;
-				for (pixel_y = 0; pixel_y < 4; pixel_y++) {
-					for (pixel_x = 0; pixel_x < 4; pixel_x++){
-						/* We already have color of upper left pixel */
-						if ((pixel_y != 0) || (pixel_x !=0)) {
-							colorA = BE_16 (&encoded[stream_ptr]);
-							stream_ptr += 2;
-						}
-						COLOR_TO_RGB (r, g, b, colorA);
-						PAINT_CURRENT_PIXEL(r, g, b, colorA);
-					}
-					block_ptr += row_inc;
-				}
-				ADVANCE_BLOCK();
-				break;
-				
-			/* Unknown opcode */
-			default:
-				mp_msg(MSGT_DECVIDEO, MSGL_HINT, "Unknown opcode %d in rpza chunk."
-							 " Skip remaining %lu bytes of chunk data.\n", opcode,
-							 chunk_size - stream_ptr);
-				return;
-		} /* Opcode switch */
-
-	}
-}
--- a/qtsmc.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,513 +0,0 @@
-/*
-    Apple Graphics (SMC) Decoder for MPlayer
-    by Mike Melanson
-    Special thanks for Roberto Togni <rtogni@bresciaonline.it> for
-    tracking down the final, nagging bugs.
-
-    The description of the decoding algorithm can be found here:
-      http://www.pcisys.net/~melanson/codecs/
-*/
-
-#include <stdlib.h>
-#include "config.h"
-#include "bswap.h"
-#include "mp_msg.h"
-
-#define BE_16(x) (be2me_16(*(unsigned short *)(x)))
-#define BE_32(x) (be2me_32(*(unsigned int *)(x)))
-
-#define COLORS_PER_TABLE 256
-#define BYTES_PER_COLOR 4
-
-#define CPAIR 2
-#define CQUAD 4
-#define COCTET 8
-
-static unsigned char *color_pairs;
-static unsigned char *color_quads;
-static unsigned char *color_octets;
-
-static int color_pair_index;
-static int color_quad_index;
-static int color_octet_index;
-
-static int smc_initialized;
-
-// returns 0 if successfully initialized (enough memory was available),
-//  non-zero on failure
-int qt_init_decode_smc(void)
-{
-  // be pessimistic to start
-  smc_initialized = 0;
-
-  // allocate memory for the 3 palette tables
-  if ((color_pairs = (unsigned char *)malloc(
-    COLORS_PER_TABLE * BYTES_PER_COLOR * 2)) == 0)
-    return 1;  
-  if ((color_quads = (unsigned char *)malloc(
-    COLORS_PER_TABLE * BYTES_PER_COLOR * 4)) == 0)
-    return 1;  
-  if ((color_octets = (unsigned char *)malloc(
-    COLORS_PER_TABLE * BYTES_PER_COLOR * 8)) == 0)
-    return 1;
-
-  // if execution got this far, initialization succeeded
-  smc_initialized = 1;
-  return 0;
-}
-
-#define GET_BLOCK_COUNT \
-  (opcode & 0x10) ? (1 + encoded[stream_ptr++]) : 1 + (opcode & 0x0F);
-#define ADVANCE_BLOCK() \
-{ \
-  pixel_ptr += block_x_inc; \
-  if (pixel_ptr >= byte_width) \
-  { \
-    pixel_ptr = 0; \
-    row_ptr += block_y_inc * 4; \
-  } \
-  total_blocks--; \
-  if (total_blocks < 0) \
-  { \
-    mp_msg(MSGT_DECVIDEO, MSGL_WARN, "block counter just went negative (this should not happen)\n"); \
-    return; \
-  } \
-}
-
-void qt_decode_smc(
-  unsigned char *encoded,
-  int encoded_size,
-  unsigned char *decoded,
-  int pixel_width,
-  int pixel_height,
-  unsigned char *palette_map,
-  int bytes_per_pixel)
-{
-  int i;
-  int stream_ptr = 0;
-  int chunk_size;
-  unsigned char opcode;
-  int n_blocks;
-  unsigned int color_flags;
-  unsigned int color_flags_a;
-  unsigned int color_flags_b;
-  unsigned int flag_mask;
-
-  int byte_width = pixel_width * bytes_per_pixel;  // width of a row in bytes
-  int byte_height = pixel_height * byte_width;  // max image size, basically
-  int row_ptr = 0;
-  int pixel_ptr = 0;
-  int pixel_x, pixel_y;
-  int row_inc = bytes_per_pixel * (pixel_width - 4);
-  int block_x_inc = bytes_per_pixel * 4;
-  int block_y_inc = bytes_per_pixel * pixel_width;
-  int block_ptr;
-  int prev_block_ptr;
-  int prev_block_ptr1, prev_block_ptr2;
-  int prev_block_flag;
-  int total_blocks;
-  int color_table_index;  // indexes to color pair, quad, or octet tables
-  int color_index;  // indexes into palette map
-
-  if (!smc_initialized)
-    return;
-
-  // reset color tables
-  color_pair_index = 0;
-  color_quad_index = 0;
-  color_octet_index = 0;
-
-  chunk_size = BE_32(&encoded[stream_ptr]) & 0x00FFFFFF;
-  stream_ptr += 4;
-  if (chunk_size != encoded_size)
-    mp_msg(MSGT_DECVIDEO, MSGL_WARN, "MOV chunk size != encoded chunk size; using MOV chunk size\n");
-
-  chunk_size = encoded_size;
-  total_blocks = (pixel_width * pixel_height) / (4 * 4);
-
-  // traverse through the blocks
-  while (total_blocks)
-  {
-    // sanity checks
-    // make sure stream ptr hasn't gone out of bounds
-    if (stream_ptr > chunk_size)
-    {
-      mp_msg(MSGT_DECVIDEO, MSGL_ERR, 
-        "SMC decoder just went out of bounds (stream ptr = %d, chunk size = %d)\n",
-        stream_ptr, chunk_size);
-      return;
-    }
-    // make sure the row pointer hasn't gone wild
-    if (row_ptr >= byte_height)
-    {
-      mp_msg(MSGT_DECVIDEO, MSGL_ERR, 
-        "SMC decoder just went out of bounds (row ptr = %d, height = %d)\n",
-        row_ptr, byte_height);
-      return;
-    }
-
-    opcode = encoded[stream_ptr++];
-    switch (opcode & 0xF0)
-    {
-    // skip n blocks
-    case 0x00:
-    case 0x10:
-      n_blocks = GET_BLOCK_COUNT;
-      while (n_blocks--)
-        ADVANCE_BLOCK();
-      break;
-
-    // repeat last block n times
-    case 0x20:
-    case 0x30:
-      n_blocks = GET_BLOCK_COUNT;
-
-      // sanity check
-      if ((row_ptr == 0) && (pixel_ptr == 0))
-      {
-        mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-          "encountered repeat block opcode (%02X) but no blocks rendered yet\n",
-          opcode & 0xF0);
-        break;
-      }
-
-      // figure out where the previous block started
-      if (pixel_ptr == 0)
-        prev_block_ptr1 = (row_ptr - block_y_inc * 4) + 
-          byte_width - block_x_inc;
-      else
-        prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc;
-
-      while (n_blocks--)
-      {
-        block_ptr = row_ptr + pixel_ptr;
-        prev_block_ptr = prev_block_ptr1;
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            decoded[block_ptr++] = decoded[prev_block_ptr++];
-            decoded[block_ptr++] = decoded[prev_block_ptr++];
-            decoded[block_ptr++] = decoded[prev_block_ptr++];
-            if (bytes_per_pixel == 4) /* 32bpp */
-            {
-              block_ptr++;
-              prev_block_ptr++;
-            }
-          }
-          block_ptr += row_inc;
-          prev_block_ptr += row_inc;
-        }
-        ADVANCE_BLOCK();
-      }
-      break;
-
-    // repeat previous pair of blocks n times
-    case 0x40:
-    case 0x50:
-      n_blocks = GET_BLOCK_COUNT;
-      n_blocks *= 2;
-
-      // sanity check
-      if ((row_ptr == 0) && (pixel_ptr < 2 * block_x_inc))
-      {
-        mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-          "encountered repeat block opcode (%02X) but not enough blocks rendered yet\n",
-          opcode & 0xF0);
-        break;
-      }
-
-      // figure out where the previous 2 blocks started
-      if (pixel_ptr == 0)
-        prev_block_ptr1 = (row_ptr - block_y_inc * 4) + 
-          byte_width - block_x_inc * 2;
-      else if (pixel_ptr == block_x_inc)
-        prev_block_ptr1 = (row_ptr - block_y_inc * 4) + 
-          byte_width - block_x_inc;
-      else
-        prev_block_ptr1 = row_ptr + pixel_ptr - block_x_inc * 2;
-
-      if (pixel_ptr == 0)
-        prev_block_ptr2 = (row_ptr - block_y_inc * 4) + 
-          (byte_width - block_x_inc);
-      else
-        prev_block_ptr2 = row_ptr + pixel_ptr - block_x_inc;
-
-      prev_block_flag = 0;
-      while (n_blocks--)
-      {
-        block_ptr = row_ptr + pixel_ptr;
-        if (prev_block_flag)
-          prev_block_ptr = prev_block_ptr2;
-        else
-          prev_block_ptr = prev_block_ptr1;
-        prev_block_flag = !prev_block_flag;
-
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            decoded[block_ptr++] = decoded[prev_block_ptr++];
-            decoded[block_ptr++] = decoded[prev_block_ptr++];
-            decoded[block_ptr++] = decoded[prev_block_ptr++];
-            if (bytes_per_pixel == 4) /* 32bpp */
-            {
-              block_ptr++;
-              prev_block_ptr++;
-            }
-          }
-          block_ptr += row_inc;
-          prev_block_ptr += row_inc;
-        }
-        ADVANCE_BLOCK();
-      }
-      break;
-
-    // 1-color block encoding
-    case 0x60:
-    case 0x70:
-      n_blocks = GET_BLOCK_COUNT;
-      color_index = encoded[stream_ptr++] * 4;
-
-      while (n_blocks--)
-      {
-        block_ptr = row_ptr + pixel_ptr;
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            decoded[block_ptr++] = palette_map[color_index + 0];
-            decoded[block_ptr++] = palette_map[color_index + 1];
-            decoded[block_ptr++] = palette_map[color_index + 2];
-            if (bytes_per_pixel == 4) /* 32bpp */
-              block_ptr++;
-          }
-          block_ptr += row_inc;
-        }
-        ADVANCE_BLOCK();
-      }
-      break;
-
-    // 2-color block encoding
-    case 0x80:
-    case 0x90:
-      n_blocks = (opcode & 0x0F) + 1;
-
-      // figure out which color pair to use to paint the 2-color block
-      if ((opcode & 0xF0) == 0x80)
-      {
-        // fetch the next 2 colors from bytestream and store in next
-        // available entry in the color pair table
-        for (i = 0; i < CPAIR; i++)
-        {
-          color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
-          color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index + 
-            (i * BYTES_PER_COLOR);
-          color_pairs[color_table_index + 0] = palette_map[color_index + 0];
-          color_pairs[color_table_index + 1] = palette_map[color_index + 1];
-          color_pairs[color_table_index + 2] = palette_map[color_index + 2];
-        }
-        // this is the base index to use for this block
-        color_table_index = CPAIR * BYTES_PER_COLOR * color_pair_index;
-        color_pair_index++;
-        if (color_pair_index == COLORS_PER_TABLE)
-          color_pair_index = 0;
-      }
-      else
-        color_table_index = CPAIR * BYTES_PER_COLOR * encoded[stream_ptr++];
-
-      while (n_blocks--)
-      {
-        color_flags = BE_16(&encoded[stream_ptr]);
-        stream_ptr += 2;
-        flag_mask = 0x8000;
-        block_ptr = row_ptr + pixel_ptr;
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            if (color_flags & flag_mask)
-              color_index = color_table_index + BYTES_PER_COLOR;
-            else
-              color_index = color_table_index;
-            flag_mask >>= 1;
-
-            decoded[block_ptr++] = color_pairs[color_index + 0];
-            decoded[block_ptr++] = color_pairs[color_index + 1];
-            decoded[block_ptr++] = color_pairs[color_index + 2];
-            if (bytes_per_pixel == 4) /* 32bpp */
-              block_ptr++;
-          }
-          block_ptr += row_inc;
-        }
-        ADVANCE_BLOCK();
-      }
-      break;
-
-    // 4-color block encoding
-    case 0xA0:
-    case 0xB0:
-      n_blocks = (opcode & 0x0F) + 1;
-
-      // figure out which color quad to use to paint the 4-color block
-      if ((opcode & 0xF0) == 0xA0)
-      {
-        // fetch the next 4 colors from bytestream and store in next
-        // available entry in the color quad table
-        for (i = 0; i < CQUAD; i++)
-        {
-          color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
-          color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index + 
-            (i * BYTES_PER_COLOR);
-          color_quads[color_table_index + 0] = palette_map[color_index + 0];
-          color_quads[color_table_index + 1] = palette_map[color_index + 1];
-          color_quads[color_table_index + 2] = palette_map[color_index + 2];
-        }
-        // this is the base index to use for this block
-        color_table_index = CQUAD * BYTES_PER_COLOR * color_quad_index;
-        color_quad_index++;
-        if (color_quad_index == COLORS_PER_TABLE)
-          color_quad_index = 0;
-      }
-      else
-        color_table_index = CQUAD * BYTES_PER_COLOR * encoded[stream_ptr++];
-
-      while (n_blocks--)
-      {
-        color_flags = BE_32(&encoded[stream_ptr]);
-        stream_ptr += 4;
-        // flag mask actually acts as a bit shift count here
-        flag_mask = 30;
-        block_ptr = row_ptr + pixel_ptr;
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            color_index = color_table_index + (BYTES_PER_COLOR * 
-              ((color_flags >> flag_mask) & 0x03));
-            flag_mask -= 2;
-
-            decoded[block_ptr++] = color_quads[color_index + 0];
-            decoded[block_ptr++] = color_quads[color_index + 1];
-            decoded[block_ptr++] = color_quads[color_index + 2];
-            if (bytes_per_pixel == 4) /* 32bpp */
-              block_ptr++;
-          }
-          block_ptr += row_inc;
-        }
-        ADVANCE_BLOCK();
-      }
-      break;
-
-    // 8-color block encoding
-    case 0xC0:
-    case 0xD0:
-      n_blocks = (opcode & 0x0F) + 1;
-
-      // figure out which color octet to use to paint the 8-color block
-      if ((opcode & 0xF0) == 0xC0)
-      {
-        // fetch the next 8 colors from bytestream and store in next
-        // available entry in the color octet table
-        for (i = 0; i < COCTET; i++)
-        {
-          color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
-          color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index + 
-            (i * BYTES_PER_COLOR);
-          color_octets[color_table_index + 0] = palette_map[color_index + 0];
-          color_octets[color_table_index + 1] = palette_map[color_index + 1];
-          color_octets[color_table_index + 2] = palette_map[color_index + 2];
-        }
-        // this is the base index to use for this block
-        color_table_index = COCTET * BYTES_PER_COLOR * color_octet_index;
-        color_octet_index++;
-        if (color_octet_index == COLORS_PER_TABLE)
-          color_octet_index = 0;
-      }
-      else
-        color_table_index = COCTET * BYTES_PER_COLOR * encoded[stream_ptr++];
-
-      while (n_blocks--)
-      {
-        /*
-          For this input:
-            01 23 45 67 89 AB
-          This is the output:
-            flags_a = xx012456, flags_b = xx89A37B
-        */
-        // build the color flags
-        color_flags_a = color_flags_b = 0;
-        color_flags_a =
-          (encoded[stream_ptr + 0] << 16) |
-          ((encoded[stream_ptr + 1] & 0xF0) << 8) |
-          ((encoded[stream_ptr + 2] & 0xF0) << 4) |
-          ((encoded[stream_ptr + 2] & 0x0F) << 4) |
-          ((encoded[stream_ptr + 3] & 0xF0) >> 4);
-        color_flags_b =
-          (encoded[stream_ptr + 4] << 16) |
-          ((encoded[stream_ptr + 5] & 0xF0) << 8) |
-          ((encoded[stream_ptr + 1] & 0x0F) << 8) |
-          ((encoded[stream_ptr + 3] & 0x0F) << 4) |
-          (encoded[stream_ptr + 5] & 0x0F);
-        stream_ptr += 6;
-
-        color_flags = color_flags_a;
-        // flag mask actually acts as a bit shift count here
-        flag_mask = 21;
-        block_ptr = row_ptr + pixel_ptr;
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          // reload flags at third row (iteration pixel_y == 2)
-          if (pixel_y == 2)
-          {
-            color_flags = color_flags_b;
-            flag_mask = 21;
-          }
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            color_index = color_table_index + (BYTES_PER_COLOR * 
-              ((color_flags >> flag_mask) & 0x07));
-            flag_mask -= 3;
-
-            decoded[block_ptr++] = color_octets[color_index + 0];
-            decoded[block_ptr++] = color_octets[color_index + 1];
-            decoded[block_ptr++] = color_octets[color_index + 2];
-            if (bytes_per_pixel == 4) /* 32bpp */
-              block_ptr++;
-          }
-          block_ptr += row_inc;
-        }
-        ADVANCE_BLOCK();
-      }
-      break;
-
-    // 16-color block encoding (every pixel is a different color)
-    case 0xE0:
-      n_blocks = (opcode & 0x0F) + 1;
-
-      while (n_blocks--)
-      {
-        block_ptr = row_ptr + pixel_ptr;
-        for (pixel_y = 0; pixel_y < 4; pixel_y++)
-        {
-          for (pixel_x = 0; pixel_x < 4; pixel_x++)
-          {
-            color_index = encoded[stream_ptr++] * BYTES_PER_COLOR;
-            decoded[block_ptr++] = palette_map[color_index + 0];
-            decoded[block_ptr++] = palette_map[color_index + 1];
-            decoded[block_ptr++] = palette_map[color_index + 2];
-            if (bytes_per_pixel == 4) /* 32bpp */
-              block_ptr++;
-          }
-          block_ptr += row_inc;
-        }
-        ADVANCE_BLOCK();
-      }
-      break;
-
-    case 0xF0:
-      mp_msg(MSGT_DECVIDEO, MSGL_HINT, "0xF0 opcode seen in SMC chunk (MPlayer developers would like to know)\n");
-      break;
-    }
-  }
-}
--- a/roqav.c	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,670 +0,0 @@
-/*
-    RoQ A/V decoder for the MPlayer program
-    by Mike Melanson
-    based on Dr. Tim Ferguson's RoQ document and accompanying source
-    code found at:
-      http://www.csse.monash.edu.au/~timf/videocodec.html
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "config.h"
-#include "bswap.h"
-#include "mp_msg.h"
-#include "mp_image.h"
-
-#define LE_16(x) (le2me_16(*(unsigned short *)(x)))
-#define LE_32(x) (le2me_32(*(unsigned int *)(x)))
-
-#define CLAMP_S16(x)  if (x < -32768) x = -32768; \
-  else if (x > 32767) x = 32767;
-#define SE_16BIT(x)  if (x & 0x8000) x -= 0x10000;
-
-// RoQ chunk types
-#define RoQ_INFO           0x1001
-#define RoQ_QUAD_CODEBOOK  0x1002
-#define RoQ_QUAD_VQ        0x1011
-#define RoQ_SOUND_MONO     0x1020
-#define RoQ_SOUND_STEREO   0x1021
-
-#define MAX_ROQ_CODEBOOK_SIZE 256
-
-// codebook entry for 2x2 vector
-typedef struct
-{
-  // upper and lower luminance value pairs of 2x2 vector: [y0 y1], [y2 y3]
-  unsigned short v2_y_u;
-  unsigned short v2_y_l;
-
-  // chrominance components
-  unsigned char u, v;
-
-  // these variables are for rendering a doublesized 8x8 block; e.g.:
-  // v2_y_rows12 = [y0 y0 y1 y1]
-  // v2_y_rows34 = [y2 y2 y3 y3]
-  unsigned long v2d_y_rows_12;
-  unsigned long v2d_y_rows_34;
-  // ex: v2_u_row1 = [u u]
-  //     v2_v_row2 = [v v]
-  unsigned short v2d_u_rows_12;
-  unsigned short v2d_v_rows_12;
-
-  // maintain separate bytes for the luminance values as well
-  unsigned char y0, y1, y2, y3;
-} roq_v2_codebook;
-
-// codebook entry for 4x4 vector
-typedef struct
-{
-  unsigned char v2_index[4];
-} roq_v4_codebook;
-
-typedef struct
-{
-  roq_v2_codebook v2[MAX_ROQ_CODEBOOK_SIZE];
-  roq_v4_codebook v4[MAX_ROQ_CODEBOOK_SIZE];
-  mp_image_t *prev_frame;
-} roqvideo_info;
-
-
-// This function fills in the missing information for a v2 vector after
-// loading the Y, U and V values.
-inline void prep_v2(roq_v2_codebook *v2)
-{
-  v2->v2_y_u = be2me_16((v2->y0 << 8) | v2->y1);
-  v2->v2_y_l = be2me_16((v2->y2 << 8) | v2->y3);
-
-  v2->v2d_y_rows_12 = be2me_32((v2->y0 << 24) | (v2->y0 << 16) |
-    (v2->y1 << 8) | v2->y1);
-  v2->v2d_y_rows_34 = be2me_32((v2->y2 << 24) | (v2->y2 << 16) |
-    (v2->y3 << 8) | v2->y3);
-
-  // no reason to swap these for endianness since they're the same bytes
-  v2->v2d_u_rows_12 = (v2->u << 8) | v2->u;
-  v2->v2d_v_rows_12 = (v2->v << 8) | v2->v;
-}
-
-inline void paint_v2double_block(
-  unsigned char *y_plane,
-  unsigned char *u_plane,
-  unsigned char *v_plane,
-  roq_v2_codebook *v2,
-  unsigned int y_stride,
-  unsigned int u_stride,
-  unsigned int v_stride)
-{
-  // render the luminance components
-  *(unsigned int *)y_plane = v2->v2d_y_rows_12;
-  y_plane += y_stride;
-  *(unsigned int *)y_plane = v2->v2d_y_rows_12;
-  y_plane += y_stride;
-  *(unsigned int *)y_plane = v2->v2d_y_rows_34;
-  y_plane += y_stride;
-  *(unsigned int *)y_plane = v2->v2d_y_rows_34;
-
-  // render the color planes
-  *(unsigned short *)u_plane = v2->v2d_u_rows_12;
-  u_plane += u_stride;
-  *(unsigned short *)u_plane = v2->v2d_u_rows_12;
-
-  *(unsigned short *)v_plane = v2->v2d_v_rows_12;
-  v_plane += v_stride;
-  *(unsigned short *)v_plane = v2->v2d_v_rows_12;
-}
-
-inline void paint_v4_block(
-  unsigned char *y_plane,
-  unsigned char *u_plane,
-  unsigned char *v_plane,
-  unsigned int y_stride,
-  unsigned int u_stride,
-  unsigned int v_stride,
-  roq_v2_codebook *v2_a,
-  roq_v2_codebook *v2_b,
-  roq_v2_codebook *v2_c,
-  roq_v2_codebook *v2_d)
-{
-  // render luminance components
-  ((unsigned short *)y_plane)[0] = v2_a->v2_y_u;
-  ((unsigned short *)y_plane)[1] = v2_b->v2_y_u;
-
-  y_plane += y_stride;
-  ((unsigned short *)y_plane)[0] = v2_a->v2_y_l;
-  ((unsigned short *)y_plane)[1] = v2_b->v2_y_l;
-
-  y_plane += y_stride;
-  ((unsigned short *)y_plane)[0] = v2_c->v2_y_u;
-  ((unsigned short *)y_plane)[1] = v2_d->v2_y_u;
-
-  y_plane += y_stride;
-  ((unsigned short *)y_plane)[0] = v2_c->v2_y_l;
-  ((unsigned short *)y_plane)[1] = v2_d->v2_y_l;
-
-  // render the color planes
-  u_plane[0] = v2_a->u;
-  u_plane[1] = v2_b->u;
-  u_plane += u_stride;
-  u_plane[0] = v2_c->u;
-  u_plane[1] = v2_d->u;
-
-  v_plane[0] = v2_a->v;
-  v_plane[1] = v2_b->v;
-  v_plane += v_stride;
-  v_plane[0] = v2_c->v;
-  v_plane[1] = v2_d->v;
-}
-
-// This function copies the 4x4 block from the prev_*_planes to the
-// current *_planes.
-inline void copy_4x4_block(
-  unsigned char *y_plane,
-  unsigned char *u_plane,
-  unsigned char *v_plane,
-  unsigned char *prev_y_plane,
-  unsigned char *prev_u_plane,
-  unsigned char *prev_v_plane,
-  unsigned int y_stride,
-  unsigned int u_stride,
-  unsigned int v_stride)
-{
-  int i;
-
-  // copy over the luminance components (4 rows, 1 uint each)
-  for (i = 0; i < 4; i++)
-  {
-    *(unsigned int *)y_plane = *(unsigned int *)prev_y_plane;
-    y_plane += y_stride;
-    prev_y_plane += y_stride;
-  }
-
-  // copy the chrominance values
-  for (i = 0; i < 2; i++)
-  {
-    *(unsigned short*)u_plane = *(unsigned short*)prev_u_plane;
-    u_plane += u_stride;
-    prev_u_plane += u_stride;
-    *(unsigned short*)v_plane = *(unsigned short*)prev_v_plane;
-    v_plane += v_stride;
-    prev_v_plane += v_stride;
-  }
-}
-
-// This function copies the 8x8 block from the prev_*_planes to the
-// current *_planes.
-inline void copy_8x8_block(
-  unsigned char *y_plane,
-  unsigned char *u_plane,
-  unsigned char *v_plane,
-  unsigned char *prev_y_plane,
-  unsigned char *prev_u_plane,
-  unsigned char *prev_v_plane,
-  unsigned int y_stride,
-  unsigned int u_stride,
-  unsigned int v_stride)
-{
-  int i;
-
-  // copy over the luminance components (8 rows, 2 uints each)
-  for (i = 0; i < 8; i++)
-  {
-    ((unsigned int *)y_plane)[0] = ((unsigned int *)prev_y_plane)[0];
-    ((unsigned int *)y_plane)[1] = ((unsigned int *)prev_y_plane)[1];
-    y_plane += y_stride;
-    prev_y_plane += y_stride;
-  }
-
-  // copy the chrominance values
-  for (i = 0; i < 4; i++)
-  {
-    *(unsigned int*)u_plane = *(unsigned int*)prev_u_plane;
-    u_plane += u_stride;
-    prev_u_plane += u_stride;
-    *(unsigned int*)v_plane = *(unsigned int*)prev_v_plane;
-    v_plane += v_stride;
-    prev_v_plane += v_stride;
-  }
-}
-
-// This function creates storage space for the vector codebooks.
-void *roq_decode_video_init(void)
-{
-  roqvideo_info *info =
-    (roqvideo_info *)calloc(sizeof(roqvideo_info), 1);
-
-  info->prev_frame = NULL;
-
-  return info;
-}
-
-#define EMPTY_ROQ_CODEWORD 0xFFFF0000
-
-#define FETCH_NEXT_CODE() \
-  if (current_roq_codeword == EMPTY_ROQ_CODEWORD) \
-  { \
-    if (stream_ptr + 2 > encoded_size) \
-    { \
-      mp_msg(MSGT_DECVIDEO, MSGL_WARN,  \
-        "RoQ video: stream pointer just went out of bounds (1)\n"); \
-      return; \
-    } \
-    current_roq_codeword = (0x0000FFFF) | \
-      (encoded[stream_ptr + 0] << 16) | \
-      (encoded[stream_ptr + 1] << 24); \
-    stream_ptr += 2; \
-  } \
-  roq_code = ((current_roq_codeword >> 30) & 0x03); \
-  current_roq_codeword <<= 2;
-
-#define FETCH_NEXT_ARGUMENT() \
-  if (stream_ptr + 1 > encoded_size) \
-  { \
-    mp_msg(MSGT_DECVIDEO, MSGL_WARN,  \
-      "RoQ video: stream pointer just went out of bounds (2)\n"); \
-    return; \
-  } \
-  argument = encoded[stream_ptr++];
-
-#define CHECK_PREV_FRAME() \
-  if (!info->prev_frame) \
-  { \
-    mp_msg(MSGT_DECVIDEO, MSGL_WARN, \
-      "RoQ video: can't handle motion vector when there's no previous frame\n"); \
-    return; \
-  }
-
-void roq_decode_video(void *context, unsigned char *encoded,
-  int encoded_size, mp_image_t *mpi)
-{
-  roqvideo_info *info = (roqvideo_info *)context;
-
-  int stream_ptr = 0;
-  int i, j;
-  int chunk_length;
-  int v2_count;
-  int v4_count;
-
-  int roq_code;
-  unsigned int current_roq_codeword = EMPTY_ROQ_CODEWORD;
-  unsigned char argument = 0;
-  char mean_motion_x;
-  char mean_motion_y;
-  int mx, my; // for calculating the motion vector
-
-  int mblock_x = 0;
-  int mblock_y = 0;
-  int quad8_x, quad8_y;  // for pointing to 8x8 blocks in a macroblock
-  int quad4_x, quad4_y;  // for pointing to 4x4 blocks in an 8x8 block
-
-  unsigned char *y_plane;
-  unsigned char *u_plane;
-  unsigned char *v_plane;
-  unsigned char *prev_y_plane;
-  unsigned char *prev_u_plane;
-  unsigned char *prev_v_plane;
-  unsigned int y_stride = mpi->stride[0];
-  unsigned int u_stride = mpi->stride[1];
-  unsigned int v_stride = mpi->stride[2];
-
-  roq_v4_codebook v4;
-
-  // make sure the encoded chunk is of minimal acceptable length
-  if (encoded_size < 8)
-  {
-    mp_msg(MSGT_DECVIDEO, MSGL_WARN, 
-      "RoQ video: chunk isn't even 8 bytes long (minimum acceptable length)\n");
-    return;
-  }
-
-  // make sure the resolution checks out
-  if ((mpi->width % 16 != 0) || (mpi->height % 16 != 0))
-  {
-    mp_msg(MSGT_DECVIDEO, MSGL_WARN, 
-      "RoQ video resolution: %d x %d; expected dimensions divisible by 16\n");
-    return;
-  }
-
-  if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_CODEBOOK)
-  {
-    stream_ptr += 2;
-    chunk_length = LE_32(&encoded[stream_ptr]);
-    stream_ptr += 4;
-    v4_count = encoded[stream_ptr++];
-    v2_count = encoded[stream_ptr++];
-    if (v2_count == 0)
-      v2_count = 256;
-    if ((v4_count == 0) && (v2_count * 6 < chunk_length))
-      v4_count = 256;
-
-    // make sure the lengths agree with each other
-    if (((v2_count * 6) + (v4_count * 4)) != chunk_length)
-    {
-      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-        "RoQ video: encountered quad codebook chunk with weird lengths (1)\n");
-      return;
-    }
-    if ((v2_count * 6) > (encoded_size - stream_ptr))
-    {
-      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-        "RoQ video: encountered quad codebook chunk with weird lengths (2)\n");
-      return;
-    }
-
-    // load the 2x2 vectors
-    for (i = 0; i < v2_count; i++)
-    {
-      info->v2[i].y0 = encoded[stream_ptr++];
-      info->v2[i].y1 = encoded[stream_ptr++];
-      info->v2[i].y2 = encoded[stream_ptr++];
-      info->v2[i].y3 = encoded[stream_ptr++];
-      info->v2[i].u = encoded[stream_ptr++];
-      info->v2[i].v = encoded[stream_ptr++];
-      prep_v2(&info->v2[i]);
-    }
-
-    if ((v4_count * 4) > (encoded_size - stream_ptr))
-    {
-      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-        "RoQ video: encountered quad codebook chunk with weird lengths (3)\n");
-      return;
-    }
-
-    // load the 4x4 vectors
-    for (i = 0; i < v4_count; i++)
-    {
-      info->v4[i].v2_index[0] = encoded[stream_ptr++];
-      info->v4[i].v2_index[1] = encoded[stream_ptr++];
-      info->v4[i].v2_index[2] = encoded[stream_ptr++];
-      info->v4[i].v2_index[3] = encoded[stream_ptr++];
-    }
-  }
-
-  if (LE_16(&encoded[stream_ptr]) == RoQ_QUAD_VQ)
-  {
-    stream_ptr += 2;
-    chunk_length = LE_32(&encoded[stream_ptr]);
-    stream_ptr += 4;
-    mean_motion_y = encoded[stream_ptr++];
-    mean_motion_x = encoded[stream_ptr++];
-
-    // start by copying entire previous frame
-    if (info->prev_frame)
-    {
-      memcpy(mpi->planes[0], info->prev_frame->planes[0],
-        mpi->width * mpi->height);
-      memcpy(mpi->planes[1], info->prev_frame->planes[1],
-        (mpi->width * mpi->height) / 4);
-      memcpy(mpi->planes[2], info->prev_frame->planes[2], 
-        (mpi->width * mpi->height) / 4);
-    }
-
-    // iterate through the 16x16 macroblocks
-    for (mblock_y = 0; mblock_y < mpi->height; mblock_y += 16)
-    {
-      for (mblock_x = 0; mblock_x < mpi->width; mblock_x += 16)
-      {
-        // iterate through the 4 quadrants of the macroblock
-        for (i = 0; i < 4; i++)
-        {
-          quad8_x = mblock_x;
-          quad8_y = mblock_y;
-          if (i & 0x01) quad8_x += 8;
-          if (i & 0x02) quad8_y += 8;
-
-          // set up the planes
-          y_plane = mpi->planes[0] + quad8_y * y_stride + quad8_x;
-          u_plane = mpi->planes[1] + (quad8_y / 2) * u_stride + (quad8_x / 2);
-          v_plane = mpi->planes[2] + (quad8_y / 2) * v_stride + (quad8_x / 2);
-
-          // decide how to handle this 8x8 quad
-          FETCH_NEXT_CODE();
-          switch(roq_code)
-          {
-            // 8x8 block is the same as in the previous frame;
-            // skip it
-            case 0:
-              break;
-
-            // 8x8 block is painted with an 8x8 block from the last frame
-            // (i.e., motion compensation)
-            case 1:
-              CHECK_PREV_FRAME();
-
-              // prepare the pointers to the planes in the previous frame
-              FETCH_NEXT_ARGUMENT();  // argument contains motion vectors
-
-              // figure out the motion vectors
-              mx = quad8_x + 8 - (argument >> 4) - mean_motion_x;
-              my = quad8_y + 8 - (argument & 0x0F) - mean_motion_y;
-
-              prev_y_plane = info->prev_frame->planes[0] +
-                my * y_stride + mx;
-              prev_u_plane = info->prev_frame->planes[1] +
-                (my / 2) * u_stride + (mx + 1) / 2;
-              prev_v_plane = info->prev_frame->planes[2] +
-                (my / 2) * v_stride + (mx + 1) / 2;
-
-// sanity check before rendering
-              copy_8x8_block(
-                y_plane,
-                u_plane,
-                v_plane,
-                prev_y_plane,
-                prev_u_plane,
-                prev_v_plane,
-                y_stride,
-                u_stride,
-                v_stride
-              );
-              break;
-
-            // 8x8 block is painted with a doublesized 4x4 vector
-            case 2:
-              FETCH_NEXT_ARGUMENT();
-              v4 = info->v4[argument];
-
-// sanity check before rendering
-              // iterate through 4 4x4 blocks
-              for (j = 0; j < 4; j++)
-              {
-                quad4_x = quad8_x;
-                quad4_y = quad8_y;
-                if (j & 0x01) quad4_x += 4;
-                if (j & 0x02) quad4_y += 4;
-
-                // set up the planes
-                y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x;
-                u_plane = mpi->planes[1] + 
-                  (quad4_y / 2) * u_stride + (quad4_x / 2);
-                v_plane = mpi->planes[2] + 
-                  (quad4_y / 2) * v_stride + (quad4_x / 2);
-
-                paint_v2double_block(
-                  y_plane,
-                  u_plane,
-                  v_plane,
-                  &info->v2[v4.v2_index[j]],
-                  y_stride,
-                  u_stride,
-                  v_stride
-                );
-              }
-              break;
-
-            // 8x8 block is broken down into 4 4x4 blocks and painted using
-            // 4 different codes.
-            case 3:
-              // iterate through 4 4x4 blocks
-              for (j = 0; j < 4; j++)
-              {
-                quad4_x = quad8_x;
-                quad4_y = quad8_y;
-                if (j & 0x01) quad4_x += 4;
-                if (j & 0x02) quad4_y += 4;
-
-                // set up the planes
-                y_plane = mpi->planes[0] + quad4_y * y_stride + quad4_x;
-                u_plane = mpi->planes[1] + 
-                  (quad4_y / 2) * u_stride + (quad4_x / 2);
-                v_plane = mpi->planes[2] + 
-                  (quad4_y / 2) * v_stride + (quad4_x / 2);
-
-                // decide how to handle this 4x4 quad
-                FETCH_NEXT_CODE();
-                switch(roq_code)
-                {
-                  // 4x4 block is the same as in the previous frame;
-                  // skip it
-                  case 0:
-                    break;
-
-                  // 4x4 block is motion compensated from the previous frame
-                  case 1:
-                    CHECK_PREV_FRAME();
-                    // prepare the pointers to the planes in the previous frame
-                    FETCH_NEXT_ARGUMENT();  // argument contains motion vectors
-
-                    // figure out the motion vectors
-                    mx = quad4_x + 8 - (argument >> 4) - mean_motion_x;
-                    my = quad4_y + 8 - (argument & 0x0F) - mean_motion_y;
-
-                    prev_y_plane = info->prev_frame->planes[0] +
-                      my * y_stride + mx;
-                    prev_u_plane = info->prev_frame->planes[1] +
-                     (my / 2) * u_stride + (mx + 1) / 2;
-                    prev_v_plane = info->prev_frame->planes[2] +
-                     (my / 2) * u_stride + (mx + 1) / 2;
-
-// sanity check before rendering
-                    copy_4x4_block(
-                      y_plane,
-                      u_plane,
-                      v_plane,
-                      prev_y_plane,
-                      prev_u_plane,
-                      prev_v_plane,
-                      y_stride,
-                      u_stride,
-                      v_stride
-                    );
-                    break;
-
-                  // 4x4 block is copied directly from v4 vector table
-                  case 2:
-                    FETCH_NEXT_ARGUMENT();
-                    v4 = info->v4[argument];
-
-                    paint_v4_block(
-                      y_plane,
-                      u_plane,
-                      v_plane,
-                      y_stride,
-                      u_stride,
-                      v_stride,
-                      &info->v2[v4.v2_index[0]],
-                      &info->v2[v4.v2_index[1]],
-                      &info->v2[v4.v2_index[2]],
-                      &info->v2[v4.v2_index[3]]);
-                    break;
-
-                  // 4x4 block is built from 4 2x2 vectors
-                  case 3:
-                    if (stream_ptr + 4 > encoded_size)
-                    {
-                      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-                        "RoQ video: stream pointer just went out of bounds (2)\n");
-                      return;
-                    }
-                    paint_v4_block(
-                      y_plane,
-                      u_plane,
-                      v_plane,
-                      y_stride,
-                      u_stride,
-                      v_stride,
-                      &info->v2[encoded[stream_ptr + 0]],
-                      &info->v2[encoded[stream_ptr + 1]],
-                      &info->v2[encoded[stream_ptr + 2]],
-                      &info->v2[encoded[stream_ptr + 3]]);
-                    stream_ptr += 4;
-                    break;
-                }
-              }
-              break;
-          }
-        }
-      }
-    }
-  }
-
-  // one last sanity check on the way out
-  // (apparently, it's not unusual to have 2 bytes left over after decode)
-  if (stream_ptr < encoded_size - 2)
-  {
-      mp_msg(MSGT_DECVIDEO, MSGL_WARN,
-        "RoQ video: completed frame decode with bytes left over (%d < %d)\n",
-          stream_ptr, encoded_size);
-  }
-
-  // save the current frame as the previous frame for the next iteration
-  info->prev_frame = mpi;
-}
-
-// Initialize the RoQ audio decoder, which is to say, initialize the table
-// of squares.
-void *roq_decode_audio_init(void)
-{
-  short *square_array;
-  short square;
-  int i;
-
-  square_array = (short *)malloc(256 * sizeof(short));
-  if (!square_array)
-    return NULL;
-
-  for (i = 0; i < 128; i++)
-  {
-    square = i * i;
-    square_array[i] = square;
-    square_array[i + 128] = -square;
-  }
-
-  return square_array;
-}
-
-int roq_decode_audio(
-  unsigned short *output,
-  unsigned char *input,
-  int encoded_size,
-  int channels,
-  void *context)
-{
-  short *square_array = (short *)context;
-  int i;
-  int predictor[2];
-  int channel_number = 0;
-
-  // prepare the initial predictors
-  if (channels == 1)
-    predictor[0] = LE_16(&input[0]);
-  else
-  {
-    predictor[0] = input[1] << 8;
-    predictor[1] = input[0] << 8;
-  }
-  SE_16BIT(predictor[0]);
-  SE_16BIT(predictor[1]);
-
-  // decode the samples
-  for (i = 2; i < encoded_size; i++)
-  {
-    predictor[channel_number] += square_array[input[i]];
-    CLAMP_S16(predictor[channel_number]);
-    output[i - 2] = predictor[channel_number];
-
-    // toggle channel
-    channel_number ^= channels - 1;
-  }
-
-  // return the number of samples decoded
-  return (encoded_size - 2);
-}
--- a/roqav.h	Sat Apr 13 17:44:14 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-#ifndef ROQAV_H
-#define ROQAV_H
-
-void *roq_decode_video_init(void);
-void roq_decode_video(void *context, unsigned char *encoded, 
-  int encoded_size, mp_image_t *mpi);
-
-void *roq_decode_audio_init(void);
-int roq_decode_audio(unsigned short *output, unsigned char *input,
-  int encoded_size, int channels, void *context);
-
-#endif // ROQAV_H