view libmpcodecs/vf_divtc.c @ 28992:947ef23ba798

Test if create_vdp_decoder() might succeed by calling it from config() with a small value for max_reference_frames. This does not make automatic recovery by using software decoder possible, but lets MPlayer fail more graciously on - actually existing - buggy hardware that does not support certain H264 widths when using hardware accelerated decoding (784, 864, 944, 1024, 1808, 1888 pixels on NVIDIA G98) and if the user tries to hardware-decode more samples at the same time than supported. Might break playback of H264 Intra-Only samples on hardware with very little video memory.
author cehoyos
date Sat, 21 Mar 2009 20:11:05 +0000
parents ccbde3575f0a
children a49f45515e08
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>

#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
#include "libavutil/common.h"
#include "mpbswap.h"

#include "img_format.h"
#include "mp_image.h"
#include "vf.h"

#include "libvo/fastmemcpy.h"

const vf_info_t vf_info_divtc;

struct vf_priv_s
   {
   int deghost, pass, phase, window, fcount, bcount, frameno, misscount,
      ocount, sum[5];
   double threshold;
   FILE *file;
   char *bdata;
   unsigned int *csdata;
   int *history;
   };

/*
 * diff_MMX and diff_C stolen from vf_decimate.c
 */

#if HAVE_MMX && HAVE_EBX_AVAILABLE
static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
   {
   volatile short out[4];
   __asm__ (
	"movl $8, %%ecx \n\t"
	"pxor %%mm4, %%mm4 \n\t"
	"pxor %%mm7, %%mm7 \n\t"

	ASMALIGN(4)
	"1: \n\t"

	"movq (%%"REG_S"), %%mm0 \n\t"
	"movq (%%"REG_S"), %%mm2 \n\t"
	"add %%"REG_a", %%"REG_S" \n\t"
	"movq (%%"REG_D"), %%mm1 \n\t"
	"add %%"REG_b", %%"REG_D" \n\t"
	"psubusb %%mm1, %%mm2 \n\t"
	"psubusb %%mm0, %%mm1 \n\t"
	"movq %%mm2, %%mm0 \n\t"
	"movq %%mm1, %%mm3 \n\t"
	"punpcklbw %%mm7, %%mm0 \n\t"
	"punpcklbw %%mm7, %%mm1 \n\t"
	"punpckhbw %%mm7, %%mm2 \n\t"
	"punpckhbw %%mm7, %%mm3 \n\t"
	"paddw %%mm0, %%mm4 \n\t"
	"paddw %%mm1, %%mm4 \n\t"
	"paddw %%mm2, %%mm4 \n\t"
	"paddw %%mm3, %%mm4 \n\t"

	"decl %%ecx \n\t"
	"jnz 1b \n\t"
	"movq %%mm4, (%%"REG_d") \n\t"
	"emms \n\t"
	:
	: "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out)
	: "%ecx", "memory"
	);
   return out[0]+out[1]+out[2]+out[3];
   }
#endif

static int diff_C(unsigned char *old, unsigned char *new, int os, int ns)
   {
   int x, y, d=0;

   for(y=8; y; y--, new+=ns, old+=os)
      for(x=8; x; x--)
	 d+=abs(new[x]-old[x]);

   return d;
   }

static int (*diff)(unsigned char *, unsigned char *, int, int);

static int diff_plane(unsigned char *old, unsigned char *new,
		      int w, int h, int os, int ns, int arg)
   {
   int x, y, d, max=0, sum=0, n=0;

   for(y=0; y<h-7; y+=8)
      {
      for(x=0; x<w-7; x+=8)
	 {
	 d=diff(old+x+y*os, new+x+y*ns, os, ns);
	 if(d>max) max=d;
	 sum+=d;
	 n++;
	 }
      }

   return (sum+n*max)/2;
   }

/*
static unsigned int checksum_plane(unsigned char *p, unsigned char *z,
				   int w, int h, int s, int zs, int arg)
   {
   unsigned int shift, sum;
   unsigned char *e;

   for(sum=0; h; h--, p+=s-w)
      for(e=p+w, shift=32; p<e;)
	 sum^=(*p++)<<(shift=(shift-8)&31);

   return sum;
   }
*/

static unsigned int checksum_plane(unsigned char *p, unsigned char *z,
				   int w, int h, int s, int zs, int arg)
   {
   unsigned int shift;
   uint32_t sum, t;
   unsigned char *e, *e2;
#if __WORDSIZE==64
   typedef uint64_t wsum_t;
#else
   typedef uint32_t wsum_t;
#endif
   wsum_t wsum;

   for(sum=0; h; h--, p+=s-w)
      {
      for(shift=0, e=p+w; (int)p&(sizeof(wsum_t)-1) && p<e;)
	 sum^=*p++<<(shift=(shift-8)&31);

      for(wsum=0, e2=e-sizeof(wsum_t)+1; p<e2; p+=sizeof(wsum_t))
	 wsum^=*(wsum_t *)p;

#if __WORDSIZE==64
      t=be2me_32((uint32_t)(wsum>>32^wsum));
#else
      t=be2me_32(wsum);
#endif

      for(sum^=(t<<shift|t>>(32-shift)); p<e;)
	 sum^=*p++<<(shift=(shift-8)&31);
      }

   return sum;
   }

static int deghost_plane(unsigned char *d, unsigned char *s,
			 int w, int h, int ds, int ss, int threshold)
   {
   int t;
   unsigned char *e;

   for(; h; h--, s+=ss-w, d+=ds-w)
      for(e=d+w; d<e; d++, s++)
	 if(abs(*d-*s)>=threshold)
	    *d=(t=(*d<<1)-*s)<0?0:t>255?255:t;

   return 0;
   }

static int copyop(unsigned char *d, unsigned char *s, int bpl, int h, int dstride, int sstride, int dummy) {
  memcpy_pic(d, s, bpl, h, dstride, sstride);
  return 0;
}

static int imgop(int(*planeop)(unsigned char *, unsigned char *,
			       int, int, int, int, int),
		 mp_image_t *dst, mp_image_t *src, int arg)
   {
   if(dst->flags&MP_IMGFLAG_PLANAR)
      return planeop(dst->planes[0], src?src->planes[0]:0,
		     dst->w, dst->h,
		     dst->stride[0], src?src->stride[0]:0, arg)+
	     planeop(dst->planes[1], src?src->planes[1]:0,
		     dst->chroma_width, dst->chroma_height,
		     dst->stride[1], src?src->stride[1]:0, arg)+
	     planeop(dst->planes[2], src?src->planes[2]:0,
		     dst->chroma_width, dst->chroma_height,
		     dst->stride[2], src?src->stride[2]:0, arg);

   return planeop(dst->planes[0], src?src->planes[0]:0,
		  dst->w*(dst->bpp/8), dst->h,
		  dst->stride[0], src?src->stride[0]:0, arg);
   }

/*
 * Find the phase in which the telecine pattern fits best to the
 * given 5 frame slice of frame difference measurements.
 *
 * If phase1 and phase2 are not negative, only the two specified
 * phases are tested.
 */

static int match(struct vf_priv_s *p, int *diffs,
		 int phase1, int phase2, double *strength)
   {
   static const int pattern1[]={ -4,  1, 1, 1, 1 },
      pattern2[]={ -2, -3, 4, 4, -3 }, *pattern;
   int f, m, n, t[5];

   pattern=p->deghost>0?pattern2:pattern1;

   for(f=0; f<5; f++)
      {
      if(phase1<0 || phase2<0 || f==phase1 || f==phase2)
	 {
	 for(n=t[f]=0; n<5; n++)
	    t[f]+=diffs[n]*pattern[(n-f+5)%5];
	 }
      else
	 t[f]=INT_MIN;
      }

   /* find the best match */
   for(m=0, n=1; n<5; n++)
      if(t[n]>t[m]) m=n;

   if(strength)
      {
      /* the second best match */
      for(f=m?0:1, n=f+1; n<5; n++)
	 if(n!=m && t[n]>t[f]) f=n;

      *strength=(t[m]>0?(double)(t[m]-t[f])/t[m]:0.0);
      }

   return m;
   }

static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts)
   {
   mp_image_t *dmpi, *tmpi=0;
   int n, m, f, newphase;
   struct vf_priv_s *p=vf->priv;
   unsigned int checksum;
   double d;

   dmpi=vf_get_image(vf->next, mpi->imgfmt,
		     MP_IMGTYPE_STATIC, MP_IMGFLAG_ACCEPT_STRIDE |
		     MP_IMGFLAG_PRESERVE | MP_IMGFLAG_READABLE,
		     mpi->width, mpi->height);
   vf_clone_mpi_attributes(dmpi, mpi);

   newphase=p->phase;

   switch(p->pass)
      {
      case 1:
	 fprintf(p->file, "%08x %d\n",
		 (unsigned int)imgop((void *)checksum_plane, mpi, 0, 0),
		 p->frameno?imgop(diff_plane, dmpi, mpi, 0):0);
	 break;

      case 2:
	 if(p->frameno/5>p->bcount)
	    {
	    mp_msg(MSGT_VFILTER, MSGL_ERR,
		   "\n%s: Log file ends prematurely! "
		   "Switching to one pass mode.\n", vf->info->name);
	    p->pass=0;
	    break;
	    }

	 checksum=(unsigned int)imgop((void *)checksum_plane, mpi, 0, 0);

	 if(checksum!=p->csdata[p->frameno])
	    {
	    for(f=0; f<100; f++)
	       if(p->frameno+f<p->fcount && p->csdata[p->frameno+f]==checksum)
		  break;
	       else if(p->frameno-f>=0 && p->csdata[p->frameno-f]==checksum)
		  {
		  f=-f;
		  break;
		  }

	    if(f<100)
	       {
	       mp_msg(MSGT_VFILTER, MSGL_INFO,
		      "\n%s: Mismatch with pass-1: %+d frame(s).\n",
		      vf->info->name, f);

	       p->frameno+=f;
	       p->misscount=0;
	       }
	    else if(p->misscount++>=30)
	       {
	       mp_msg(MSGT_VFILTER, MSGL_ERR,
		      "\n%s: Sync with pass-1 lost! "
		      "Switching to one pass mode.\n", vf->info->name);
	       p->pass=0;
	       break;
	       }
	    }

	 n=(p->frameno)/5;
	 if(n>=p->bcount) n=p->bcount-1;

	 newphase=p->bdata[n];
	 break;

      default:
	 if(p->frameno)
	    {
	    int *sump=p->sum+p->frameno%5,
	       *histp=p->history+p->frameno%p->window;

	    *sump-=*histp;
	    *sump+=(*histp=imgop(diff_plane, dmpi, mpi, 0));
	    }

	 m=match(p, p->sum, -1, -1, &d);

	 if(d>=p->threshold)
	    newphase=m;
      }

   n=p->ocount++%5;

   if(newphase!=p->phase && ((p->phase+4)%5<n)==((newphase+4)%5<n))
      {
      p->phase=newphase;
      mp_msg(MSGT_VFILTER, MSGL_STATUS,
	     "\n%s: Telecine phase %d.\n", vf->info->name, p->phase);
      }

   switch((p->frameno++-p->phase+10)%5)
      {
      case 0:
	 imgop(copyop, dmpi, mpi, 0);
	 return 0;

      case 4:
	 if(p->deghost>0)
	    {
	    tmpi=vf_get_image(vf->next, mpi->imgfmt,
			      MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE |
			      MP_IMGFLAG_READABLE,
			      mpi->width, mpi->height);
	    vf_clone_mpi_attributes(tmpi, mpi);

	    imgop(copyop, tmpi, mpi, 0);
	    imgop(deghost_plane, tmpi, dmpi, p->deghost);
	    imgop(copyop, dmpi, mpi, 0);
	    return vf_next_put_image(vf, tmpi, MP_NOPTS_VALUE);
	    }
      }

   imgop(copyop, dmpi, mpi, 0);
   return vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE);
   }

static int analyze(struct vf_priv_s *p)
   {
   int *buf=0, *bp, bufsize=0, n, b, f, i, j, m, s;
   unsigned int *cbuf=0, *cp;
   char *pbuf;
   char lbuf[256];
   int sum[5];
   double d;

   /* read the file */

   n=15;
   while(fgets(lbuf, 256, p->file))
      {
      if(n>=bufsize-19)
	 {
	 bufsize=bufsize?bufsize*2:30000;
	 if((bp=realloc(buf, bufsize*sizeof *buf))) buf=bp;
	 if((cp=realloc(cbuf, bufsize*sizeof *cbuf))) cbuf=cp;

	 if(!bp || !cp)
	    {
	    mp_msg(MSGT_VFILTER, MSGL_FATAL, "%s: Not enough memory.\n",
		   vf_info_divtc.name);
	    free(buf);
	    free(cbuf);
	    return 0;
	    }
	 }
      sscanf(lbuf, "%x %d", cbuf+n, buf+n);
      n++;
      }

   if(!n)
      {
      mp_msg(MSGT_VFILTER, MSGL_FATAL, "%s: Empty 2-pass log file.\n",
	     vf_info_divtc.name);
      free(buf);
      free(cbuf);
      return 0;
      }

   /* generate some dummy data past the beginning and end of the array */

   buf+=15, cbuf+=15;
   n-=15;

   memcpy(buf-15, buf, 15*sizeof *buf);
   memset(cbuf-15, 0, 15*sizeof *cbuf);

   while(n%5)
      buf[n]=buf[n-5], cbuf[n]=0, n++;

   memcpy(buf+n, buf+n-15, 15*sizeof *buf);
   memset(cbuf+n, 0, 15*sizeof *cbuf);

   p->csdata=cbuf;
   p->fcount=n;

   /* array with one slot for each slice of 5 frames */

   p->bdata=pbuf=malloc(p->bcount=b=(n/5));
   memset(pbuf, 255, b);

   /* resolve the automatic mode */

   if(p->deghost<0)
      {
      int deghost=-p->deghost;
      double s0=0.0, s1=0.0;

      for(f=0; f<n; f+=5)
	 {
	 p->deghost=0; match(p, buf+f, -1, -1, &d); s0+=d;
	 p->deghost=1; match(p, buf+f, -1, -1, &d); s1+=d;
	 }

      p->deghost=s1>s0?deghost:0;

      mp_msg(MSGT_VFILTER, MSGL_INFO,
	     "%s: Deghosting %-3s (relative pattern strength %+.2fdB).\n",
	     vf_info_divtc.name,
	     p->deghost?"ON":"OFF",
	     10.0*log10(s1/s0));
      }

   /* analyze the data */

   for(f=0; f<5; f++)
      for(sum[f]=0, n=-15; n<20; n+=5)
	 sum[f]+=buf[n+f];

   for(f=0; f<b; f++)
      {
      m=match(p, sum, -1, -1, &d);

      if(d>=p->threshold)
	 pbuf[f]=m;

      if(f<b-1)
	 for(n=0; n<5; n++)
	    sum[n]=sum[n]-buf[5*(f-3)+n]+buf[5*(f+4)+n];
      }

   /* fill in the gaps */

   /* the beginning */
   for(f=0; f<b && pbuf[f]==-1; f++);

   if(f==b)
      {
      free(buf-15);
      mp_msg(MSGT_VFILTER, MSGL_FATAL, "%s: No telecine pattern found!\n",
	     vf_info_divtc.name);
      return 0;
      }

   for(n=0; n<f; pbuf[n++]=pbuf[f]);

   /* the end */
   for(f=b-1; pbuf[f]==-1; f--);
   for(n=f+1; n<b; pbuf[n++]=pbuf[f]);

   /* the rest */
   for(f=0;;)
      {
      while(f<b && pbuf[f]!=-1) f++;
      if(f==b) break;
      for(n=f; pbuf[n]==-1; n++);

      if(pbuf[f-1]==pbuf[n])
	 {
	 /* just a gap */
	 while(f<n) pbuf[f++]=pbuf[n];
	 }
      else
	 {
	 /* phase change, reanalyze the original data in the gap with zero
	    threshold for only the two phases that appear at the ends */

	 for(i=0; i<5; i++)
	    for(sum[i]=0, j=5*f-15; j<5*f; j+=5)
	       sum[i]+=buf[i+j];

	 for(i=f; i<n; i++)
	    {
	    pbuf[i]=match(p, sum, pbuf[f-1], pbuf[n], 0);

	    for(j=0; j<5; j++)
	       sum[j]=sum[j]-buf[5*(i-3)+j]+buf[5*(i+4)+j];
	    }

	 /* estimate the transition point by dividing the gap
	    in the same proportion as the number of matches of each kind */

	 for(i=f, m=f; i<n; i++)
	    if(pbuf[i]==pbuf[f-1]) m++;

	 /* find the transition of the right direction nearest to the
	    estimated point */

	 if(m>f && m<n)
	    {
	    for(j=m; j>f; j--)
	       if(pbuf[j-1]==pbuf[f-1] && pbuf[j]==pbuf[n]) break;
	    for(s=m; s<n; s++)
	       if(pbuf[s-1]==pbuf[f-1] && pbuf[s]==pbuf[n]) break;

	    m=(s-m<m-j)?s:j;
	    }

	 /* and rewrite the data to allow only this one transition */

	 for(i=f; i<m; i++)
	    pbuf[i]=pbuf[f-1];

	 for(; i<n; i++)
	    pbuf[i]=pbuf[n];

	 f=n;
	 }
      }

   free(buf-15);

   return 1;
   }

static int query_format(struct vf_instance_s* vf, unsigned int fmt)
   {
   switch(fmt)
      {
      case IMGFMT_444P: case IMGFMT_IYUV: case IMGFMT_RGB24:
      case IMGFMT_422P: case IMGFMT_UYVY: case IMGFMT_BGR24:
      case IMGFMT_411P: case IMGFMT_YUY2: case IMGFMT_IF09:
      case IMGFMT_YV12: case IMGFMT_I420: case IMGFMT_YVU9:
      case IMGFMT_IUYV: case IMGFMT_Y800: case IMGFMT_Y8:
	 return vf_next_query_format(vf,fmt);
      }

   return 0;
   }

static void uninit(struct vf_instance_s* vf)
   {
   if(vf->priv)
      {
      if(vf->priv->file) fclose(vf->priv->file);
      if(vf->priv->csdata) free(vf->priv->csdata-15);
      free(vf->priv->bdata);
      free(vf->priv->history);
      free(vf->priv);
      }
   }

static int open(vf_instance_t *vf, char* args)
   {
   struct vf_priv_s *p;
   char *filename="framediff.log", *ap, *q, *a;

   if(args && !(args=strdup(args)))
      {
   nomem:
      mp_msg(MSGT_VFILTER, MSGL_FATAL,
	     "%s: Not enough memory.\n", vf->info->name);
   fail:
      uninit(vf);
      free(args);
      return 0;
      }

   vf->put_image=put_image;
   vf->uninit=uninit;
   vf->query_format=query_format;
   vf->default_reqs=VFCAP_ACCEPT_STRIDE;
   if(!(vf->priv=p=calloc(1, sizeof(struct vf_priv_s))))
      goto nomem;

   p->phase=5;
   p->threshold=0.5;
   p->window=30;

   if((ap=args))
      while(*ap)
	 {
	 q=ap;
	 if((ap=strchr(q, ':'))) *ap++=0; else ap=q+strlen(q);
	 if((a=strchr(q, '='))) *a++=0; else a=q+strlen(q);

	 switch(*q)
	    {
	    case 0:                              break;
	    case 'f': filename=a;                break;
	    case 't': p->threshold=atof(a);      break;
	    case 'w': p->window=5*(atoi(a)+4)/5; break;
	    case 'd': p->deghost=atoi(a);        break;
	    case 'p':
	       if(q[1]=='h') p->phase=atoi(a);
	       else p->pass=atoi(a);
	       break;

	    case 'h':
	       mp_msg(MSGT_VFILTER, MSGL_INFO,
		      "\n%s options:\n\n"
		      "pass=1|2         - Use 2-pass mode.\n"
		      "file=filename    - Set the 2-pass log file name "
		      "(default %s).\n"
		      "threshold=value  - Set the pattern recognition "
		      "sensitivity (default %g).\n"
		      "deghost=value    - Select deghosting threshold "
		      "(default %d).\n"
		      "window=numframes - Set the statistics window "
		      "for 1-pass mode (default %d).\n"
		      "phase=0|1|2|3|4  - Set the initial phase "
		      "for 1-pass mode (default %d).\n\n"
		      "The option names can be abbreviated to the shortest "
		      "unique prefix.\n\n",
		      vf->info->name, filename, p->threshold, p->deghost,
		      p->window, p->phase%5);
	       break;

	    default:
	       mp_msg(MSGT_VFILTER, MSGL_FATAL,
		      "%s: Unknown argument %s.\n", vf->info->name, q);
	       goto fail;
	    }
	 }

   switch(p->pass)
      {
      case 1:
	 if(!(p->file=fopen(filename, "w")))
	    {
	    mp_msg(MSGT_VFILTER, MSGL_FATAL,
		   "%s: Can't create file %s.\n", vf->info->name, filename);
	    goto fail;
	    }

	 break;

      case 2:
	 if(!(p->file=fopen(filename, "r")))
	    {
	    mp_msg(MSGT_VFILTER, MSGL_FATAL,
		   "%s: Can't open file %s.\n", vf->info->name, filename);
	    goto fail;
	    }

	 if(!analyze(p))
	    goto fail;

	 fclose(p->file);
	 p->file=0;
	 break;
      }

   if(p->window<5) p->window=5;
   if(!(p->history=calloc(sizeof *p->history, p->window)))
      goto nomem;

   diff = diff_C;
#if HAVE_MMX && HAVE_EBX_AVAILABLE
   if(gCpuCaps.hasMMX) diff = diff_MMX;
#endif

   free(args);
   return 1;
   }

const vf_info_t vf_info_divtc =
   {
   "inverse telecine for deinterlaced video",
   "divtc",
   "Ville Saari",
   "",
   open,
   NULL
   };