Mercurial > mplayer.hg
changeset 11600:5eb66d37d539
Yet another inverse telecine filter by Zoltan Hidvegi <mplayer@hzoli.2y.net>. Also heavily MMX centric.
author | alex |
---|---|
date | Mon, 08 Dec 2003 22:57:47 +0000 |
parents | ad9216814665 |
children | 1b26a30d4229 |
files | DOCS/man/en/mplayer.1 libmpcodecs/Makefile libmpcodecs/cmmx.h libmpcodecs/vf.c libmpcodecs/vf_filmdint.c |
diffstat | 5 files changed, 1706 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/DOCS/man/en/mplayer.1 Mon Dec 08 22:28:50 2003 +0000 +++ b/DOCS/man/en/mplayer.1 Mon Dec 08 22:57:47 2003 +0000 @@ -2917,6 +2917,80 @@ than 24 fps, meaning you'll have trouble if you use it with mencoder \-ofps 23.976. .TP +.B filmdint[=options] +Inverse telecine filter, similar to the pullup filter above. +It is designed to handle any pulldown pattern, including mixed soft and +hard telecine and limited support for movies that are slowed down or sped +up from their original framerate for TV. +Only the luma plane is used to find the frame breaks. +If a field has no match, it is deinterlaced with simple linear +approximation. +If the source is MPEG-2, libmpeg2 must be used for decoding (not +ffmpeg2!), and this must be the first filter to allow access to the +field-flags set by the MPEG-2 decoder. +Depending on the source mpeg, you may be fine ignoring this advice, as +long as you do not see lots of "Bottom-first field" warnings. +With no options it does normal inverse telecine, and should be used +together with mencoder \-fps 29.97 \-ofps 23.976. +When this filter is used with mplayer, it will result in an uneven +framerate during playback, but it is still generally better than using +pp=lb or no deinterlacing at all. +Multiple options can be specified separated by /. +.RSs +.IPs crop=w:h:x:y +Just like the crop filter, but faster, and works on mixed hard and soft +telecined content as well as when y is not a multiple of 4. +If x or y would require cropping fractional pixels from the chroma +planes, the crop area is extended. +This usually means that x and y must be even. +.IPs io=ifps:ofps +For each ifps input frames the filter will output ofps frames. +The ratio of ifps/ofps should match the \-fps/\-ofps ratio. +This could be used to filter movies that are broadcast on TV at a frame +rate different from their original frame rate. +.IPs luma_only=n +If n is nonzero, the chroma plane is copied unchanged. +This is useful for YV12 sampled TV, which discards one of the chroma +fields. +.IPs mmx2=n +On x86, if n=1, use MMX2 optimized functions, if n=2, use 3DNow! +optimized functions, othewise, use plain C. +If this option is not specified, MMX2 and 3DNow! are auto-detected, use +this option to override auto-detection. +.IPs fast=n +The larger n will speed up the filter at the expense of accuracy. +The default value is n=3. +If n is odd, a frame immediately following a frame marked with the +REPEAT_FIRST_FIELD mpeg flag is assumed to be progressive, thus filter +will not spend any time on soft-telecined MPEG-2 content. +This is the only effect of this flag if MMX2 or 3DNow! is available. +Without MMX2 and 3DNow, if n=0 or 1, the same calculations will be used +as with MMX2. +If n=2 or 3, the number of luma levels used to find the frame breaks is +reduced from 256 to 128, which results in a faster filter without losing +much accuracy. +If n=4 or 5, a faster, but much less accurate metrics will be used to +find the frame breaks, which is more likely to misdetect high vertical +detail as interlaced content. +.IPs verbose=n +If n is nonzero, print the detailed metrics for each frame. +Useful for debugging. +.IPs dint_thres=n +Deinterlace threshold. +Used during de-interlacing of unmatched frames. +Larger value means less deinterlacing, use n=256 to completely turn off +deinterlacing. +Default is n=8. +.IPs comb_thres=n +Threshold for comparing a top and bottom fields. +Defaults to 128. +.IPs diff_thres=n +Threshold to detect temporal change of a field. +Default is 128. +.IPs sad_thres=n +Sum of Absolute Difference threshold, default is 64. +.RE +.TP .B softpulldown This filter works only correct with MEncoder and acts on the MPEG2 flags used for soft 3:2 pulldown (soft telecine).
--- a/libmpcodecs/Makefile Mon Dec 08 22:28:50 2003 +0000 +++ b/libmpcodecs/Makefile Mon Dec 08 22:57:47 2003 +0000 @@ -14,7 +14,7 @@ VIDEO_SRCS_OPT=vd_realvid.c vd_ffmpeg.c vd_dshow.c vd_dmo.c vd_vfw.c vd_vfwex.c vd_odivx.c vd_divx4.c vd_zrmjpeg.c vd_xanim.c vd_xvid.c vd_xvid4.c vd_libdv.c vd_qtvideo.c vd_theora.c VIDEO_SRCS=dec_video.c vd.c $(VIDEO_SRCS_NAT) $(VIDEO_SRCS_LIB) $(VIDEO_SRCS_OPT) -VFILTER_SRCS=vf.c vf_vo.c vf_crop.c vf_expand.c vf_scale.c vf_format.c vf_yuy2.c vf_flip.c vf_rgb2bgr.c vf_rotate.c vf_mirror.c vf_palette.c vf_lavc.c vf_dvbscale.c vf_cropdetect.c vf_test.c vf_noise.c vf_yvu9.c vf_rectangle.c vf_lavcdeint.c vf_eq.c vf_eq2.c vf_halfpack.c vf_dint.c vf_1bpp.c vf_bmovl.c vf_2xsai.c vf_unsharp.c vf_swapuv.c vf_il.c vf_boxblur.c vf_sab.c vf_smartblur.c vf_perspective.c vf_down3dright.c vf_field.c vf_denoise3d.c vf_hqdn3d.c vf_detc.c vf_telecine.c vf_tfields.c vf_ivtc.c vf_ilpack.c vf_dsize.c vf_decimate.c vf_softpulldown.c vf_tinterlace.c vf_pullup.c pullup.c vf_framestep.c vf_tile.c vf_delogo.c vf_fil.c vf_hue.c vf_spp.c vf_yuvcsp.c +VFILTER_SRCS=vf.c vf_vo.c vf_crop.c vf_expand.c vf_scale.c vf_format.c vf_yuy2.c vf_flip.c vf_rgb2bgr.c vf_rotate.c vf_mirror.c vf_palette.c vf_lavc.c vf_dvbscale.c vf_cropdetect.c vf_test.c vf_noise.c vf_yvu9.c vf_rectangle.c vf_lavcdeint.c vf_eq.c vf_eq2.c vf_halfpack.c vf_dint.c vf_1bpp.c vf_bmovl.c vf_2xsai.c vf_unsharp.c vf_swapuv.c vf_il.c vf_boxblur.c vf_sab.c vf_smartblur.c vf_perspective.c vf_down3dright.c vf_field.c vf_denoise3d.c vf_hqdn3d.c vf_detc.c vf_telecine.c vf_tfields.c vf_ivtc.c vf_ilpack.c vf_dsize.c vf_decimate.c vf_softpulldown.c vf_tinterlace.c vf_pullup.c pullup.c vf_framestep.c vf_tile.c vf_delogo.c vf_fil.c vf_hue.c vf_spp.c vf_yuvcsp.c vf_filmdint.c ifeq ($(HAVE_FFPOSTPROCESS),yes) VFILTER_SRCS += vf_pp.c endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/cmmx.h Mon Dec 08 22:57:47 2003 +0000 @@ -0,0 +1,186 @@ +/* + * x86 MMX and MMX2 packed byte operations in portable C. + * Extra instructions: pdiffub, pcmpzb, psumbw, pcmpgtub + * Author: Zoltan Hidvegi + */ + +#ifndef __CMMX_H +#define __CMMX_H + +typedef unsigned long cmmx_t; + +#define ONE_BYTES (~(cmmx_t)0 / 255) +#define SIGN_BITS (ONE_BYTES << 7) +#define LOWBW_MASK (~(cmmx_t)0 / 257) + +static inline cmmx_t +paddb(cmmx_t a, cmmx_t b) +{ + return ((a & ~SIGN_BITS) + (b & ~SIGN_BITS)) ^ ((a^b) & SIGN_BITS); +} + +static inline cmmx_t +psubb(cmmx_t a, cmmx_t b) +{ + return ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ (~(a^b) & SIGN_BITS); +} + +static inline cmmx_t +paddusb(cmmx_t a, cmmx_t b) +{ + cmmx_t s = (a & ~SIGN_BITS) + (b & ~SIGN_BITS); + cmmx_t abs = (a | b) & SIGN_BITS; + cmmx_t c = abs & (s | (a & b)); + return s | abs | (abs - (c >> 7)); +} + +static inline cmmx_t +paddusb_s(cmmx_t a, cmmx_t b) +{ + cmmx_t sum = a+b; + cmmx_t ov = sum & SIGN_BITS; + return sum + (sum ^ (ov - (ov>>7))); +} + +static inline cmmx_t +psubusb(cmmx_t a, cmmx_t b) +{ + cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS); + cmmx_t anb = a & ~b; + cmmx_t c = (anb | (s & ~(a^b))) & SIGN_BITS; + return s & ((c & anb) | (c - (c >> 7))); +} + +static inline cmmx_t +psubusb_s(cmmx_t a, cmmx_t b) +{ + cmmx_t d = (a|SIGN_BITS) - b; + cmmx_t m = d & SIGN_BITS; + return d & (m - (m>>7)); +} + +static inline cmmx_t +pcmpgtub(cmmx_t b, cmmx_t a) +{ + cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS); + cmmx_t ret = ((~a & b) | (~s & ~(a ^ b))) & SIGN_BITS; + return ret | (ret - (ret >> 7)); +} + +static inline cmmx_t +pdiffub(cmmx_t a, cmmx_t b) +{ + cmmx_t xs = (~a ^ b) & SIGN_BITS; + cmmx_t s = ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ xs; + cmmx_t gt = ((~a & b) | (s & xs)) & SIGN_BITS; + cmmx_t gt7 = gt >> 7; + return (s ^ gt ^ (gt - gt7)) + gt7; +} + +static inline cmmx_t +pdiffub_s(cmmx_t a, cmmx_t b) +{ + cmmx_t d = (a|SIGN_BITS) - b; + cmmx_t g = (~d & SIGN_BITS) >> 7; + return (d ^ (SIGN_BITS-g)) + g; +} + +static inline cmmx_t +pmaxub(cmmx_t a, cmmx_t b) +{ + return psubusb(a,b) + b; +} + +static inline cmmx_t +pminub(cmmx_t a, cmmx_t b) +{ + return paddusb(a,~b) - ~b; +} + +static inline cmmx_t +pminub_s(cmmx_t a, cmmx_t b) +{ + cmmx_t d = (a|SIGN_BITS) - b; + cmmx_t m = ~SIGN_BITS + ((d&SIGN_BITS)>>7); + return ((d&m) + b) & ~SIGN_BITS; +} + +static inline cmmx_t +pavgb(cmmx_t a, cmmx_t b) +{ + cmmx_t ao = a & ONE_BYTES; + cmmx_t bo = b & ONE_BYTES; + return ((a^ao)>>1) + ((b^bo)>>1) + (ao|bo); +} + +static inline cmmx_t +pavgb_s(cmmx_t a, cmmx_t b) +{ + return ((a+b+ONE_BYTES)>>1) & ~SIGN_BITS; +} + +static inline cmmx_t +p31avgb(cmmx_t a, cmmx_t b) +{ + cmmx_t ao = a & (3*ONE_BYTES); + cmmx_t bo = b & (3*ONE_BYTES); + return 3*((a^ao)>>2) + ((b^bo)>>2) + + (((3*ao+bo+2*ONE_BYTES)>>2) & (3*ONE_BYTES)); +} + +static inline cmmx_t +p31avgb_s(cmmx_t a, cmmx_t b) +{ + cmmx_t avg = ((a+b)>>1) & ~SIGN_BITS; + return pavgb_s(avg, a); +} + +static inline unsigned long +psumbw(cmmx_t a) +{ + cmmx_t t = (a & LOWBW_MASK) + ((a>>8) & LOWBW_MASK); + unsigned long ret = + (unsigned long)t + (unsigned long)(t >> (4*sizeof(cmmx_t))); + if (sizeof(cmmx_t) > 4) + ret += ret >> 16; + return ret & 0xffff; +} + +static inline unsigned long +psumbw_s(cmmx_t a) +{ + unsigned long ret = + (unsigned long)a + (unsigned long)(a >> (4*sizeof(cmmx_t))); + if (sizeof(cmmx_t) <= 4) + return (ret & 0xff) + ((ret>>8) & 0xff); + ret = (ret & 0xff00ff) + ((ret>>8) & 0xff00ff); + ret += ret >> 16; + return ret & 0xffff; +} + +static inline unsigned long +psadbw(cmmx_t a, cmmx_t b) +{ + return psumbw(pdiffub(a,b)); +} + +static inline unsigned long +psadbw_s(cmmx_t a, cmmx_t b) +{ + return psumbw_s(pdiffub_s(a,b)); +} + +static inline cmmx_t +pcmpzb(cmmx_t a) +{ + cmmx_t ret = (((a | SIGN_BITS) - ONE_BYTES) | a) & SIGN_BITS; + return ~(ret | (ret - (ret >> 7))); +} + +static inline cmmx_t +pcmpeqb(cmmx_t a, cmmx_t b) +{ + return pcmpzb(a ^ b); +} + +#endif
--- a/libmpcodecs/vf.c Mon Dec 08 22:28:50 2003 +0000 +++ b/libmpcodecs/vf.c Mon Dec 08 22:57:47 2003 +0000 @@ -75,6 +75,7 @@ extern vf_info_t vf_info_decimate; extern vf_info_t vf_info_softpulldown; extern vf_info_t vf_info_pullup; +extern vf_info_t vf_info_filmdint; extern vf_info_t vf_info_framestep; extern vf_info_t vf_info_tile; extern vf_info_t vf_info_delogo; @@ -143,6 +144,7 @@ &vf_info_decimate, &vf_info_softpulldown, &vf_info_pullup, + &vf_info_filmdint, &vf_info_framestep, &vf_info_tile, &vf_info_delogo,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libmpcodecs/vf_filmdint.c Mon Dec 08 22:57:47 2003 +0000 @@ -0,0 +1,1443 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <sys/time.h> + +#include "../config.h" +#include "../mp_msg.h" +#include "../cpudetect.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" +#include "cmmx.h" + +#include "../libvo/fastmemcpy.h" + +#define NUM_STORED 4 + +enum pu_field_type_t { + PU_1ST_OF_3, + PU_2ND_OF_3, + PU_3RD_OF_3, + PU_1ST_OF_2, + PU_2ND_OF_2, + PU_INTERLACED +}; + +struct metrics { + /* This struct maps to a packed word 64-bit MMX register */ + unsigned short int even; + unsigned short int odd; + unsigned short int noise; + unsigned short int temp; +} __attribute__ ((aligned (8))); + +struct frame_stats { + struct metrics tiny, low, high, bigger, twox, max; + struct { unsigned int even, odd, noise, temp; } sad; + unsigned short interlaced_high; + unsigned short interlaced_low; + unsigned short num_blocks; +}; + +struct vf_priv_s { + unsigned long inframes; + unsigned long outframes; + enum pu_field_type_t prev_type; + unsigned swapped, chroma_swapped; + unsigned luma_only; + unsigned verbose; + unsigned fast; + unsigned long w, h, cw, ch, stride, chroma_stride, nplanes; + unsigned long sad_thres; + unsigned long dint_thres; + unsigned char *memory_allocated; + unsigned char *planes[2*NUM_STORED][4]; + unsigned char **old_planes; + unsigned long static_idx; + unsigned long temp_idx; + unsigned long crop_x, crop_y, crop_cx, crop_cy; + unsigned long export_count, merge_count; + unsigned long num_breaks; + long in_inc, out_dec, iosync; + long num_fields; + long prev_fields; + long notout; + long mmx2; + unsigned small_bytes[2]; + unsigned mmx_temp[2]; + struct frame_stats stats[2]; + struct metrics thres; + char chflag; + double diff_time, merge_time, decode_time, vo_time, filter_time; +}; + +#define PPZ { 2000, 2000, 0, 2000 } +#define PPR { 2000, 2000, 0, 2000 } +static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999}; +static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999}; + +extern int opt_screen_size_x; +extern int opt_screen_size_y; + +#ifndef MIN +#define MIN(a,b) (((a)<(b))?(a):(b)) +#endif +#ifndef MAX +#define MAX(a,b) (((a)>(b))?(a):(b)) +#endif + +static inline void *my_memcpy_pic(void * dst, void * src, int bytesPerLine, int height, int dstStride, int srcStride) +{ + int i; + void *retval=dst; + + for(i=0; i<height; i++) + { + memcpy(dst, src, bytesPerLine); + src+= srcStride; + dst+= dstStride; + } + + return retval; +} + +#define PDIFFUB(X,Y,T) "movq " #X "," #T "\n\t" \ + "psubusb " #Y "," #T "\n\t" \ + "psubusb " #X "," #Y "\n\t" \ + "paddusb " #Y "," #T "\n\t" + +#define PDIFFUBT(X,Y,T) "movq " #X "," #T "\n\t" \ + "psubusb " #Y "," #T "\n\t" \ + "psubusb " #X "," #Y "\n\t" \ + "paddusb " #T "," #Y "\n\t" + +#define PSUMBW(X,T,Z) "movq " #X "," #T "\n\t" \ + "punpcklbw " #Z "," #X "\n\t" \ + "punpckhbw " #Z "," #T "\n\t" \ + "paddw " #T "," #X "\n\t" \ + "movq " #X "," #T "\n\t" \ + "psllq $32, " #T "\n\t" \ + "paddw " #T "," #X "\n\t" \ + "movq " #X "," #T "\n\t" \ + "psllq $16, " #T "\n\t" \ + "paddw " #T "," #X "\n\t" \ + "psrlq $48, " #X "\n\t" + +#define PSADBW(X,Y,T,Z) PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z) + +#define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t" +#define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t" +#define PMINUBT(X,Y,T) "movq " #Y "," #T "\n\t" \ + "psubusb " #X "," #T "\n\t" \ + "psubusb " #T "," #Y "\n\t" +#define PAVGB(X,Y) "pavgusb " #X "," #Y "\n\t" + +static inline void +get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines, + struct metrics *m) +{ + a -= as; + b -= bs; + do { + cmmx_t old_po = *(cmmx_t*)(a ); + cmmx_t po = *(cmmx_t*)(b ); + cmmx_t e = *(cmmx_t*)(b + bs); + cmmx_t old_o = *(cmmx_t*)(a + 2*as); + cmmx_t o = *(cmmx_t*)(b + 2*bs); + cmmx_t ne = *(cmmx_t*)(b + 3*bs); + cmmx_t old_no = *(cmmx_t*)(a + 4*as); + cmmx_t no = *(cmmx_t*)(b + 4*bs); + + cmmx_t qup_old_odd = p31avgb(old_o, old_po); + cmmx_t qup_odd = p31avgb( o, po); + cmmx_t qdown_old_odd = p31avgb(old_o, old_no); + cmmx_t qdown_odd = p31avgb( o, no); + + cmmx_t qup_even = p31avgb(ne, e); + cmmx_t qdown_even = p31avgb(e, ne); + + cmmx_t temp_up_diff = pdiffub(qdown_even, qup_old_odd); + cmmx_t noise_up_diff = pdiffub(qdown_even, qup_odd); + cmmx_t temp_down_diff = pdiffub(qup_even, qdown_old_odd); + cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd); + + cmmx_t odd_diff = pdiffub(o, old_o); + m->odd += psumbw(odd_diff); + m->even += psadbw(e, *(cmmx_t*)(a+as)); + + temp_up_diff = pminub(temp_up_diff, temp_down_diff); + temp_up_diff = pminub(temp_up_diff, odd_diff); + m->temp += psumbw(temp_up_diff); + noise_up_diff = pminub(noise_up_diff, odd_diff); + noise_up_diff = pminub(noise_up_diff, noise_down_diff); + + m->noise += psumbw(noise_up_diff); + a += 2*as; + b += 2*bs; + } while (--lines); +} + +static inline void +get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs, + int lines, struct metrics *m) +{ + a -= as; + b -= bs; + do { + cmmx_t old_po = (*(cmmx_t*)(a ) >> 1) & ~SIGN_BITS; + cmmx_t po = (*(cmmx_t*)(b ) >> 1) & ~SIGN_BITS; + cmmx_t old_e = (*(cmmx_t*)(a + as) >> 1) & ~SIGN_BITS; + cmmx_t e = (*(cmmx_t*)(b + bs) >> 1) & ~SIGN_BITS; + cmmx_t old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS; + cmmx_t o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS; + cmmx_t ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS; + cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS; + cmmx_t no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS; + + cmmx_t qup_old_odd = p31avgb_s(old_o, old_po); + cmmx_t qup_odd = p31avgb_s( o, po); + cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no); + cmmx_t qdown_odd = p31avgb_s( o, no); + + cmmx_t qup_even = p31avgb_s(ne, e); + cmmx_t qdown_even = p31avgb_s(e, ne); + + cmmx_t temp_up_diff = pdiffub_s(qdown_even, qup_old_odd); + cmmx_t noise_up_diff = pdiffub_s(qdown_even, qup_odd); + cmmx_t temp_down_diff = pdiffub_s(qup_even, qdown_old_odd); + cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd); + + cmmx_t odd_diff = pdiffub_s(o, old_o); + m->odd += psumbw_s(odd_diff) << 1; + m->even += psadbw_s(e, old_e) << 1; + + temp_up_diff = pminub_s(temp_up_diff, temp_down_diff); + temp_up_diff = pminub_s(temp_up_diff, odd_diff); + m->temp += psumbw_s(temp_up_diff) << 1; + noise_up_diff = pminub_s(noise_up_diff, odd_diff); + noise_up_diff = pminub_s(noise_up_diff, noise_down_diff); + + m->noise += psumbw_s(noise_up_diff) << 1; + a += 2*as; + b += 2*bs; + } while (--lines); +} + +static inline void +get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, + int lines, struct metrics *m) +{ + a -= as; + b -= bs; + do { + cmmx_t old_po = (*(cmmx_t*)(a )>>1) & ~SIGN_BITS; + cmmx_t po = (*(cmmx_t*)(b )>>1) & ~SIGN_BITS; + cmmx_t old_e = (*(cmmx_t*)(a + as)>>1) & ~SIGN_BITS; + cmmx_t e = (*(cmmx_t*)(b + bs)>>1) & ~SIGN_BITS; + cmmx_t old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS; + cmmx_t o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS; + cmmx_t ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS; + + cmmx_t down_even = p31avgb_s(e, ne); + cmmx_t up_odd = p31avgb_s(o, po); + cmmx_t up_old_odd = p31avgb_s(old_o, old_po); + + cmmx_t odd_diff = pdiffub_s(o, old_o); + cmmx_t temp_diff = pdiffub_s(down_even, up_old_odd); + cmmx_t noise_diff = pdiffub_s(down_even, up_odd); + + m->even += psadbw_s(e, old_e) << 1; + m->odd += psumbw_s(odd_diff) << 1; + + temp_diff = pminub_s(temp_diff, odd_diff); + noise_diff = pminub_s(noise_diff, odd_diff); + + m->noise += psumbw_s(noise_diff) << 1; + m->temp += psumbw_s(temp_diff) << 1; + a += 2*as; + b += 2*bs; + } while (--lines); + +} + +static inline void +get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s) +{ + unsigned two_e = m->even + MAX(m->even , p->thres.even ); + unsigned two_o = m->odd + MAX(m->odd , p->thres.odd ); + unsigned two_n = m->noise + MAX(m->noise, p->thres.noise); + unsigned two_t = m->temp + MAX(m->temp , p->thres.temp ); + + unsigned e_big = m->even >= (m->odd + two_o + 1)/2; + unsigned o_big = m->odd >= (m->even + two_e + 1)/2; + unsigned n_big = m->noise >= (m->temp + two_t + 1)/2; + unsigned t_big = m->temp >= (m->noise + two_n + 1)/2; + + unsigned e2x = m->even >= two_o; + unsigned o2x = m->odd >= two_e; + unsigned n2x = m->noise >= two_t; + unsigned t2x = m->temp >= two_n; + + unsigned ntiny_e = m->even > p->thres.even ; + unsigned ntiny_o = m->odd > p->thres.odd ; + unsigned ntiny_n = m->noise > p->thres.noise; + unsigned ntiny_t = m->temp > p->thres.temp ; + + unsigned nlow_e = m->even > 2*p->thres.even ; + unsigned nlow_o = m->odd > 2*p->thres.odd ; + unsigned nlow_n = m->noise > 2*p->thres.noise; + unsigned nlow_t = m->temp > 2*p->thres.temp ; + + unsigned high_e = m->even > 4*p->thres.even ; + unsigned high_o = m->odd > 4*p->thres.odd ; + unsigned high_n = m->noise > 4*p->thres.noise; + unsigned high_t = m->temp > 4*p->thres.temp ; + + unsigned low_il = !n_big && !t_big && ntiny_n && ntiny_t; + unsigned high_il = !n_big && !t_big && nlow_n && nlow_t; + + if (low_il | high_il) { + s->interlaced_low += low_il; + s->interlaced_high += high_il; + } else { + s->tiny.even += ntiny_e; + s->tiny.odd += ntiny_o; + s->tiny.noise += ntiny_n; + s->tiny.temp += ntiny_t; + + s->low .even += nlow_e ; + s->low .odd += nlow_o ; + s->low .noise += nlow_n ; + s->low .temp += nlow_t ; + + s->high.even += high_e ; + s->high.odd += high_o ; + s->high.noise += high_n ; + s->high.temp += high_t ; + + if (m->even >= p->sad_thres) s->sad.even += m->even ; + if (m->odd >= p->sad_thres) s->sad.odd += m->odd ; + if (m->noise >= p->sad_thres) s->sad.noise += m->noise; + if (m->temp >= p->sad_thres) s->sad.temp += m->temp ; + } + s->num_blocks++; + s->max.even = MAX(s->max.even , m->even ); + s->max.odd = MAX(s->max.odd , m->odd ); + s->max.noise = MAX(s->max.noise, m->noise); + s->max.temp = MAX(s->max.temp , m->temp ); + + s->bigger.even += e_big ; + s->bigger.odd += o_big ; + s->bigger.noise += n_big ; + s->bigger.temp += t_big ; + + s->twox.even += e2x ; + s->twox.odd += o2x ; + s->twox.noise += n2x ; + s->twox.temp += t2x ; + +} + +static inline struct metrics +block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, + int lines, struct vf_priv_s *p, struct frame_stats *s) +{ + struct metrics tm; + tm.even = tm.odd = tm.noise = tm.temp = 0; + get_metrics_c(a, b, as, bs, lines, &tm); + if (sizeof(cmmx_t) < 8) + get_metrics_c(a+4, b+4, as, bs, lines, &tm); + get_block_stats(&tm, p, s); + return tm; +} + +static inline struct metrics +block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs, + int lines, struct vf_priv_s *p, struct frame_stats *s) +{ + struct metrics tm; + tm.even = tm.odd = tm.noise = tm.temp = 0; + get_metrics_fast_c(a, b, as, bs, lines, &tm); + if (sizeof(cmmx_t) < 8) + get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm); + get_block_stats(&tm, p, s); + return tm; +} + +static inline struct metrics +block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, + int lines, struct vf_priv_s *p, struct frame_stats *s) +{ + struct metrics tm; + tm.even = tm.odd = tm.noise = tm.temp = 0; + get_metrics_faster_c(a, b, as, bs, lines, &tm); + if (sizeof(cmmx_t) < 8) + get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm); + get_block_stats(&tm, p, s); + return tm; +} + +#define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise) + +#define BLOCK_METRICS_TEMPLATE() \ + asm volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \ + "pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \ + ); \ + a -= as; \ + b -= bs; \ + do { \ + asm volatile( \ + "movq (%0,%2), %%mm0\n\t" \ + "movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \ + PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \ + "paddusw %%mm0, %%mm7\n\t" /* even diff */ \ + "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */ \ + "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */ \ + "movq (%0), %%mm3\n\t" \ + "psubusb %4, %%mm3\n\t" \ + PAVGB(%%mm0, %%mm3) \ + PAVGB(%%mm0, %%mm3) /* mm3 = qup old odd */ \ + "movq %%mm0, %%mm5\n\t" \ + PSADBW(%%mm2, %%mm0, %%mm4, %%mm6) \ + "psllq $16, %%mm0\n\t" \ + "paddusw %%mm0, %%mm7\n\t" \ + "movq (%1), %%mm4\n\t" \ + "leal (%0,%2,2), %0\n\t" \ + "leal (%1,%3,2), %1\n\t" \ + "psubusb %4, %%mm4\n\t" \ + PAVGB(%%mm2, %%mm4) \ + PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \ + PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */ \ + "movq (%1,%3), %%mm5\n\t" \ + "psubusb %4, %%mm5\n\t" \ + PAVGB(%%mm1, %%mm5) \ + PAVGB(%%mm5, %%mm1) /* mm1 = qdown even */ \ + PAVGB((%1,%3), %%mm5) /* mm5 = qup next even */ \ + PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */ \ + PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */ \ + PMINUBT(%%mm2, %%mm3, %%mm0) /* limit temp to odd diff */ \ + PMINUBT(%%mm2, %%mm4, %%mm0) /* limit noise to odd diff */ \ + "movq (%1,%3,2), %%mm2\n\t" \ + "psubusb %4, %%mm2\n\t" \ + PAVGB((%1), %%mm2) \ + PAVGB((%1), %%mm2) /* mm2 = qdown odd */ \ + "movq (%0,%2,2), %%mm1\n\t" \ + "psubusb %4, %%mm1\n\t" \ + PAVGB((%0), %%mm1) \ + PAVGB((%0), %%mm1) /* mm1 = qdown old odd */ \ + PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */ \ + PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */ \ + PMINUBT(%%mm4, %%mm2, %%mm0) /* current */ \ + PMINUBT(%%mm3, %%mm1, %%mm0) /* old */ \ + PSUMBW(%%mm2, %%mm0, %%mm6) \ + PSUMBW(%%mm1, %%mm0, %%mm6) \ + "psllq $32, %%mm2\n\t" \ + "psllq $48, %%mm1\n\t" \ + "paddusw %%mm2, %%mm7\n\t" \ + "paddusw %%mm1, %%mm7\n\t" \ + : "=r" (a), "=r" (b) \ + : "r"(as), "r"(bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \ + ); \ + } while (--lines); + +static inline struct metrics +block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs, + int lines, struct vf_priv_s *p, struct frame_stats *s) +{ + struct metrics tm; +#ifndef HAVE_3DNOW + mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n"); +#else + static const unsigned long long ones = 0x0101010101010101ull; + unsigned long interlaced; + + BLOCK_METRICS_TEMPLATE(); + asm volatile("movq %%mm7, %0\n\temms" : "=m" (tm)); + get_block_stats(&tm, p, s); +#endif + return tm; +} + +#undef PSUMBW +#undef PSADBW +#undef PMAXUB +#undef PMINUBT +#undef PAVGB + +#define PSUMBW(X,T,Z) "psadbw " #Z "," #X "\n\t" +#define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t" +#define PMAXUB(X,Y) "pmaxub " #X "," #Y "\n\t" +#define PMINUBT(X,Y,T) "pminub " #X "," #Y "\n\t" +#define PAVGB(X,Y) "pavgb " #X "," #Y "\n\t" + +static inline struct metrics +block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs, + int lines, struct vf_priv_s *p, struct frame_stats *s) +{ + struct metrics tm; +#ifndef HAVE_MMX + mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n"); +#else + static const unsigned long long ones = 0x0101010101010101ull; + unsigned long interlaced; + unsigned long prefetch_line = (((long)a>>3) & 7) + 10; +#ifdef DEBUG + struct frame_stats ts = *s; +#endif + asm volatile("prefetcht0 (%0,%2)\n\t" + "prefetcht0 (%1,%3)\n\t" : + : "r" (a), "r" (b), + "r" (prefetch_line * as), "r" (prefetch_line * bs)); + + BLOCK_METRICS_TEMPLATE(); + + s->num_blocks++; + asm volatile( + "movq %3, %%mm0\n\t" + "movq %%mm7, %%mm1\n\t" + "psubusw %%mm0, %%mm1\n\t" + "movq %%mm1, %%mm2\n\t" + "paddusw %%mm0, %%mm2\n\t" + "paddusw %%mm7, %%mm2\n\t" + "pshufw $0xb1, %%mm2, %%mm3\n\t" + "pavgw %%mm7, %%mm2\n\t" + "pshufw $0xb1, %%mm2, %%mm2\n\t" + "psubusw %%mm7, %%mm2\n\t" + "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */ + "psubusw %%mm7, %%mm3\n\t" + "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */ + "movq %1, %%mm4\n\t" + "movq %2, %%mm5\n\t" + "psubw %%mm2, %%mm4\n\t" + "psubw %%mm3, %%mm5\n\t" + "movq %%mm4, %1\n\t" + "movq %%mm5, %2\n\t" + "pxor %%mm4, %%mm4\n\t" + "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */ + "psubusw %%mm0, %%mm1\n\t" + "pxor %%mm5, %%mm5\n\t" + "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */ + "psubusw %%mm0, %%mm1\n\t" + "psubusw %%mm0, %%mm1\n\t" + "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */ + "pshufw $0xb1, %%mm2, %%mm0\n\t" + "por %%mm2, %%mm0\n\t" /* 1 if not close */ + "punpckhdq %%mm0, %%mm0\n\t" + "movq %%mm4, %%mm2\n\t" /* tttt */ + "punpckhdq %%mm5, %%mm2\n\t" /* ttll */ + "por %%mm2, %%mm0\n\t" + "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */ + "psrlq $16, %%mm0\n\t" + "psrlw $15, %%mm0\n\t" + "movd %%mm0, %0\n\t" + : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox) + : "m" (p->thres) + ); + + if (interlaced) { + s->interlaced_high += interlaced >> 16; + s->interlaced_low += interlaced; + } else { + asm volatile( + "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */ + "psubw %%mm0, %%mm4\n\t" + "psubw %%mm0, %%mm5\n\t" + "psubw %%mm0, %%mm1\n\t" + "paddw %0, %%mm4\n\t" + "paddw %1, %%mm5\n\t" + "paddw %2, %%mm1\n\t" + "movq %%mm4, %0\n\t" + "movq %%mm5, %1\n\t" + "movq %%mm1, %2\n\t" + : "=m" (s->tiny), "=m" (s->low), "=m" (s->high) + ); + + asm volatile( + "pshufw $0, %2, %%mm0\n\t" + "psubusw %%mm7, %%mm0\n\t" + "pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */ + "pand %%mm7, %%mm0\n\t" + "movq %%mm0, %%mm1\n\t" + "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */ + "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */ + "paddd %0, %%mm0\n\t" + "paddd %1, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + : "=m" (s->sad.even), "=m" (s->sad.noise) + : "m" (p->sad_thres) + ); + } + + asm volatile( + "movq %%mm7, (%1)\n\t" + PMAXUW((%0), %%mm7) + "movq %%mm7, (%0)\n\t" + "emms" + : : "r" (&s->max), "r" (&tm), "X" (s->max) + : "memory" + ); +#ifdef DEBUG + if (1) { + struct metrics cm; + a -= 7*as; + b -= 7*bs; + cm = block_metrics_c(a, b, as, bs, 4, p, &ts); + if (!MEQ(tm, cm)) + mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n"); + if (s) { +# define CHECK(X) if (!MEQ(s->X, ts.X)) \ + mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n"); + CHECK(tiny); + CHECK(low); + CHECK(high); + CHECK(sad); + CHECK(max); + } + } +#endif +#endif + return tm; +} + +static inline int +dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos, + long cos, int ds, int ss, int w, int t) +{ +#ifndef HAVE_MMX + mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n"); + return 0; +#else + unsigned long len = (w+7) >> 3; + int ret; + asm volatile ( + "pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */ + "movd %0, %%mm7 \n\t" + "punpcklbw %%mm7, %%mm7 \n\t" + "punpcklwd %%mm7, %%mm7 \n\t" + "punpckldq %%mm7, %%mm7 \n\t" /* mm7 = threshold */ + : /* no output */ + : "rm" (t) + ); + do { + asm volatile ( + "movq (%0), %%mm0\n\t" + "movq (%0,%3,2), %%mm1\n\t" + "movq %%mm0, (%2)\n\t" + "pmaxub %%mm1, %%mm0\n\t" + "pavgb (%0), %%mm1\n\t" + "psubusb %%mm1, %%mm0\n\t" + "paddusb %%mm7, %%mm0\n\t" /* mm0 = max-avg+thr */ + "movq (%0,%1), %%mm2\n\t" + "movq (%0,%5), %%mm3\n\t" + "movq %%mm2, %%mm4\n\t" + PDIFFUBT(%%mm1, %%mm2, %%mm5) + PDIFFUBT(%%mm1, %%mm3, %%mm5) + "pminub %%mm2, %%mm3\n\t" + "pcmpeqb %%mm3, %%mm2\n\t" /* b = min */ + "pand %%mm2, %%mm4\n\t" + "pandn (%0,%5), %%mm2\n\t" + "por %%mm4, %%mm2\n\t" + "pminub %%mm0, %%mm3\n\t" + "pcmpeqb %%mm0, %%mm3\n\t" /* set to 1s if >= threshold */ + "psubb %%mm3, %%mm6\n\t" /* count pixels above thr. */ + "pand %%mm3, %%mm1 \n\t" + "pandn %%mm2, %%mm3 \n\t" + "por %%mm3, %%mm1 \n\t" /* avg if >= threshold */ + "movq %%mm1, (%2,%4) \n\t" + : /* no output */ + : "r" (a), "r" (bos), "r" (dst), "r" (ss), "r" (ds), "r" (cos) + ); + a += 8; + dst += 8; + } while (--len); + + asm volatile ("pxor %%mm7, %%mm7 \n\t" + "psadbw %%mm6, %%mm7 \n\t" + "movd %%mm7, %0 \n\t" + "emms \n\t" + : "=r" (ret) + ); + return ret; +#endif +} + +static inline int +dint_copy_line(unsigned char *dst, unsigned char *a, long bos, + long cos, int ds, int ss, int w, int t) +{ + unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t); + cmmx_t dint_count = 0; + cmmx_t thr; + t |= t << 8; + thr = t | (t << 16); + if (sizeof(cmmx_t) > 4) + thr |= thr << (sizeof(cmmx_t)*4); + do { + cmmx_t e = *(cmmx_t*)a; + cmmx_t ne = *(cmmx_t*)(a+2*ss); + cmmx_t o = *(cmmx_t*)(a+bos); + cmmx_t oo = *(cmmx_t*)(a+cos); + cmmx_t maxe = pmaxub(e, ne); + cmmx_t avge = pavgb(e, ne); + cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */ + cmmx_t diffo = pdiffub(avge, o); + cmmx_t diffoo = pdiffub(avge, oo); + cmmx_t diffcmp = pcmpgtub(diffo, diffoo); + cmmx_t bo = ((oo ^ o) & diffcmp) ^ o; + cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo; + cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo); + cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo; + dint_count += above_thr & ONE_BYTES; + *(cmmx_t*)(dst) = e; + *(cmmx_t*)(dst+ds) = bo_or_avg; + a += sizeof(cmmx_t); + dst += sizeof(cmmx_t); + } while (--len); + return psumbw(dint_count); +} + +static int +dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b, + unsigned char *c, unsigned long w, unsigned long h, + unsigned long ds, unsigned long ss, unsigned long threshold, + long field, long mmx2) +{ + unsigned long ret = 0; + long bos = b - a; + long cos = c - a; + if (field) { + memcpy(d, b, w); + h--; + d += ds; + a += ss; + } + bos += ss; + cos += ss; + while (h > 2) { + if (threshold >= 128) { + memcpy(d, a, w); + memcpy(d+ds, a+bos, w); + } else if (mmx2 == 1) { + ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold); + } else + ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold); + h -= 2; + d += 2*ds; + a += 2*ss; + } + memcpy(d, a, w); + if (h == 2) + memcpy(d+ds, a+bos, w); + return ret; +} + +static void +copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi, + unsigned char **old, unsigned char **new, unsigned long show) +{ + unsigned long threshold = 256; + unsigned long field = p->swapped; + unsigned long dint_pixels = 0; + unsigned char **other = old; + if (show >= 12 || !(show & 3)) + show >>= 2, other = new, new = old; + if (show <= 2) { /* Single field: de-interlace */ + threshold = p->dint_thres; + field ^= show & 1; + old = new; + } else if (show == 3) + old = new; + else + field ^= 1; + dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0], + other[0], p->w, p->h, dmpi->stride[0], + p->stride, threshold, field, p->mmx2); + if (dmpi->flags & MP_IMGFLAG_PLANAR) { + if (p->luma_only) + old = new, other = new; + else + threshold = threshold/2 + 1; + field ^= p->chroma_swapped; + dint_copy_plane(dmpi->planes[1], old[1], new[1], + other[1], p->cw, p->ch, dmpi->stride[1], + p->chroma_stride, threshold, field, p->mmx2); + dint_copy_plane(dmpi->planes[2], old[2], new[2], + other[2], p->cw, p->ch, dmpi->stride[2], + p->chroma_stride, threshold, field, p->mmx2); + } + if (dint_pixels > 0 && p->verbose) + mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels); +} + +static void diff_planes(struct vf_priv_s *p, struct frame_stats *s, + unsigned char *of, unsigned char *nf, + int w, int h, int os, int ns, int swapped) +{ + int i, y; + int align = -(long)nf & 7; + of += align; + nf += align; + w -= align; + if (swapped) + of -= os, nf -= ns; + i = (h*3 >> 7) & ~1; + of += i*os + 8; + nf += i*ns + 8; + h -= i; + w -= 16; + + memset(s, 0, sizeof(*s)); + + for (y = (h-8) >> 3; y; y--) { + if (p->mmx2 == 1) { + for (i = 0; i < w; i += 8) + block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s); + } else if (p->mmx2 == 2) { + for (i = 0; i < w; i += 8) + block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s); + } else if (p->fast > 3) { + for (i = 0; i < w; i += 8) + block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s); + } else if (p->fast > 1) { + for (i = 0; i < w; i += 8) + block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s); + } else { + for (i = 0; i < w; i += 8) + block_metrics_c(of+i, nf+i, os, ns, 4, p, s); + } + of += 8*os; + nf += 8*ns; + } +} + +#define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp + +static void diff_fields(struct vf_priv_s *p, struct frame_stats *s, + unsigned char **old, unsigned char **new) +{ + diff_planes(p, s, old[0], new[0], p->w, p->h, + p->stride, p->stride, p->swapped); + s->sad.even = (s->sad.even * 16ul) / s->num_blocks; + s->sad.odd = (s->sad.odd * 16ul) / s->num_blocks; + s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks; + s->sad.temp = (s->sad.temp * 16ul) / s->num_blocks; + if (p->verbose) + mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, " + "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, " + "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n", + p->inframes, p->chflag, METRICS(s->max), s->num_blocks, + METRICS(s->tiny), METRICS(s->low), METRICS(s->high), + METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad), + s->interlaced_low, s->interlaced_high, + p->iosync / (double) p->in_inc); +} + +static const char *parse_args(struct vf_priv_s *p, const char *args) +{ + args--; + while (args && *++args && + (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 || + sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 || + sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 || + sscanf(args, "sad_thres=%lu", &p->sad_thres ) == 1 || + sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 || + sscanf(args, "fast=%u", &p->fast ) == 1 || + sscanf(args, "mmx2=%lu", &p->mmx2 ) == 1 || + sscanf(args, "luma_only=%u", &p->luma_only ) == 1 || + sscanf(args, "verbose=%u", &p->verbose ) == 1 || + sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w, + &p->h, &p->crop_x, &p->crop_y) == 4)) + args = strchr(args, '/'); + return args; +} + +static unsigned long gcd(unsigned long x, unsigned long y) +{ + unsigned long t; + if (x > y) + t = x, x = y, y = t; + + while (x) { + t = y % x; + y = x; + x = t; + } + return y; +} + +static void init(struct vf_priv_s *p, mp_image_t *mpi) +{ + unsigned long i; + unsigned long plane_size, chroma_plane_size; + unsigned char *plane; + unsigned long cos, los; + p->crop_cx = p->crop_x >> mpi->chroma_x_shift; + p->crop_cy = p->crop_y >> mpi->chroma_y_shift; + if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) { + p->stride = (mpi->w + 15) & ~15; + p->chroma_stride = p->stride >> mpi->chroma_x_shift; + } else { + p->stride = mpi->width; + p->chroma_stride = mpi->chroma_width; + } + p->cw = p->w >> mpi->chroma_x_shift; + p->ch = p->h >> mpi->chroma_y_shift; + p->nplanes = 1; + p->static_idx = 0; + p->temp_idx = 0; + p->old_planes = p->planes[0]; + plane_size = mpi->h * p->stride; + chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ? + mpi->chroma_height * p->chroma_stride : 0; + p->memory_allocated = + malloc(NUM_STORED * (plane_size+2*chroma_plane_size) + + 8*p->chroma_stride + 4096); + /* align to page boundary */ + plane = p->memory_allocated + (-(long)p->memory_allocated & 4095); + memset(plane, 0, NUM_STORED * plane_size); + los = p->crop_x + p->crop_y * p->stride; + cos = p->crop_cx + p->crop_cy * p->chroma_stride; + for (i = 0; i != NUM_STORED; i++, plane += plane_size) { + p->planes[i][0] = plane; + p->planes[NUM_STORED + i][0] = plane + los; + } + if (mpi->flags & MP_IMGFLAG_PLANAR) { + p->nplanes = 3; + memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size); + for (i = 0; i != NUM_STORED; i++) { + p->planes[i][1] = plane; + p->planes[NUM_STORED + i][1] = plane + cos; + plane += chroma_plane_size; + p->planes[i][2] = plane; + p->planes[NUM_STORED + i][2] = plane + cos; + plane += chroma_plane_size; + } + } + p->out_dec <<= 2; + i = gcd(p->in_inc, p->out_dec); + p->in_inc /= i; + p->out_dec /= i; + p->iosync = 0; + p->num_fields = 3; +} + +static inline double get_time(void) +{ + struct timeval tv; + gettimeofday(&tv, 0); + return tv.tv_sec + tv.tv_usec * 1e-6; +} + +static void get_image(struct vf_instance_s* vf, mp_image_t *mpi) +{ + struct vf_priv_s *p = vf->priv; + static unsigned char **planes, planes_idx; + + if (mpi->type == MP_IMGTYPE_STATIC) return; + + if (!p->planes[0][0]) init(p, mpi); + + if (mpi->type == MP_IMGTYPE_TEMP || + (mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE))) + planes_idx = 2 + (++p->temp_idx & 1); + else + planes_idx = ++p->static_idx & 1; + planes = p->planes[planes_idx]; + mpi->priv = p->planes[NUM_STORED + planes_idx]; + mpi->planes[0] = planes[0]; + mpi->stride[0] = p->stride; + if (mpi->flags & MP_IMGFLAG_PLANAR) { + mpi->planes[1] = planes[1]; + mpi->planes[2] = planes[2]; + mpi->stride[1] = mpi->stride[2] = p->chroma_stride; + } + mpi->width = p->stride; + + mpi->flags |= MP_IMGFLAG_DIRECT; + mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; +} + +static inline long +cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e) +{ + long diff = x-y; + long unit = ((x+y+err) >> e); + long ret = (diff > unit) - (diff < -unit); + unit >>= 1; + return ret + (diff > unit) - (diff < -unit); +} + +static unsigned long +find_breaks(struct vf_priv_s *p, struct frame_stats *s) +{ + struct frame_stats *ps = &p->stats[(p->inframes-1) & 1]; + long notfilm = 5*p->in_inc - p->out_dec; + unsigned long n = s->num_blocks >> 8; + unsigned long sad_comb_cmp = cmpe(s->sad.temp, s->sad.noise, 512, 1); + unsigned long ret = 8; + + if (cmpe(s->sad.temp, s->sad.even, 512, 1) > 0) + mp_msg(MSGT_VFILTER, MSGL_WARN, + "@@@@@@@@ Bottom-first field??? @@@@@@@@\n"); + if (s->sad.temp > 1000 && s->sad.noise > 1000) + return 3; + if (s->interlaced_high >= 2*n && s->sad.temp > 256 && s->sad.noise > 256) + return 3; + if (s->high.noise > s->num_blocks/4 && s->sad.noise > 10000 && + s->sad.noise > 2*s->sad.even && s->sad.noise > 2*ps->sad.odd) { + // Mid-frame scene change + if (s->tiny.temp + s->interlaced_low < n || + s->low.temp + s->interlaced_high < n/4 || + s->high.temp + s->interlaced_high < n/8 || + s->sad.temp < 160) + return 1; + return 3; + } + if (s->high.temp > s->num_blocks/4 && s->sad.temp > 10000 && + s->sad.temp > 2*ps->sad.odd && s->sad.temp > 2*ps->sad.even) { + // Start frame scene change + if (s->tiny.noise + s->interlaced_low < n || + s->low.noise + s->interlaced_high < n/4 || + s->high.noise + s->interlaced_high < n/8 || + s->sad.noise < 160) + return 2; + return 3; + } + if (sad_comb_cmp == 2) + return 2; + if (sad_comb_cmp == -2) + return 1; + + if (s->tiny.odd > 3*MAX(n,s->tiny.even) + s->interlaced_low) + return 1; + if (s->tiny.even > 3*MAX(n,s->tiny.odd)+s->interlaced_low && + (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4))) + return 4; + + if (s->sad.noise < 64 && s->sad.temp < 64 && + s->low.noise <= n/2 && s->high.noise <= n/4 && + s->low.temp <= n/2 && s->high.temp <= n/4) + goto still; + + if (s->tiny.temp > 3*MAX(n,s->tiny.noise) + s->interlaced_low) + return 2; + if (s->tiny.noise > 3*MAX(n,s->tiny.temp) + s->interlaced_low) + return 1; + + if (s->low.odd > 3*MAX(n/4,s->low.even) + s->interlaced_high) + return 1; + if (s->low.even > 3*MAX(n/4,s->low.odd)+s->interlaced_high && + s->sad.even > 2*s->sad.odd && + (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4))) + return 4; + + if (s->low.temp > 3*MAX(n/4,s->low.noise) + s->interlaced_high) + return 2; + if (s->low.noise > 3*MAX(n/4,s->low.temp) + s->interlaced_high) + return 1; + + if (sad_comb_cmp == 1 && s->sad.noise < 64) + return 2; + if (sad_comb_cmp == -1 && s->sad.temp < 64) + return 1; + + if (s->tiny.odd <= n || (s->tiny.noise <= n/2 && s->tiny.temp <= n/2)) { + if (s->interlaced_low <= n) { + if (p->num_fields == 1) + goto still; + if (s->tiny.even <= n || ps->tiny.noise <= n/2) + /* Still frame */ + goto still; + if (s->bigger.even >= 2*MAX(n,s->bigger.odd) + s->interlaced_low) + return 4; + if (s->low.even >= 2*n + s->interlaced_low) + return 4; + goto still; + } + } + if (s->low.odd <= n/4) { + if (s->interlaced_high <= n/4) { + if (p->num_fields == 1) + goto still; + if (s->low.even <= n/4) + /* Still frame */ + goto still; + if (s->bigger.even >= 2*MAX(n/4,s->bigger.odd)+s->interlaced_high) + return 4; + if (s->low.even >= n/2 + s->interlaced_high) + return 4; + goto still; + } + } + if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_low) + return 2; + if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_low) + return 1; + if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_high) + return 2; + if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_high) + return 1; + if (s->twox.temp > 2*MAX(n,s->twox.noise) + s->interlaced_high) + return 2; + if (s->twox.noise > 2*MAX(n,s->twox.temp) + s->interlaced_high) + return 1; + if (s->bigger.even > 2*MAX(n,s->bigger.odd) + s->interlaced_low && + s->bigger.temp < n && s->bigger.noise < n) + return 4; + if (s->interlaced_low > MIN(2*n, s->tiny.odd)) + return 3; + ret = 8 + (1 << (s->sad.temp > s->sad.noise)); + still: + if (p->num_fields == 1 && p->prev_fields == 3 && notfilm >= 0 && + (s->tiny.temp <= s->tiny.noise || s->sad.temp < s->sad.noise+16)) + return 1; + if (p->notout < p->num_fields && p->iosync > 2*p->in_inc && notfilm < 0) + notfilm = 0; + if (p->num_fields < 2 || + (p->num_fields == 2 && p->prev_fields == 2 && notfilm < 0)) + return ret; + if (!notfilm && (p->prev_fields&~1) == 2) { + if (p->prev_fields + p->num_fields == 5) { + if (s->tiny.noise <= s->tiny.temp || + s->low.noise == 0 || s->low.noise < s->low.temp || + s->sad.noise < s->sad.temp+16) + return 2; + } + if (p->prev_fields + p->num_fields == 4) { + if (s->tiny.temp <= s->tiny.noise || + s->low.temp == 0 || s->low.temp < s->low.noise || + s->sad.temp < s->sad.noise+16) + return 1; + } + } + if (p->num_fields > 2 && + ps->sad.noise > s->sad.noise && ps->sad.noise > s->sad.temp) + return 4; + return 2 >> (s->sad.noise > s->sad.temp); +} + +#define ITOC(X) (!(X) ? ' ' : (X) + ((X)>9 ? 'a'-10 : '0')) + +static int put_image(struct vf_instance_s* vf, mp_image_t *mpi) +{ + mp_image_t *dmpi; + struct vf_priv_s *p = vf->priv; + unsigned char **planes, **old_planes; + struct frame_stats *s = &p->stats[p->inframes & 1]; + struct frame_stats *ps = &p->stats[(p->inframes-1) & 1]; + int swapped = 0; + const int flags = mpi->fields; + int breaks, prev; + int show_fields = 0; + int dropped_fields = 0; + double start_time, diff_time; + char prev_chflag = p->chflag; + int keep_rate; + + if (!p->planes[0][0]) init(p, mpi); + + old_planes = p->old_planes; + + if (mpi->flags & MP_IMGFLAG_DIRECT) { + planes = mpi->priv; + mpi->priv = 0; + } else { + planes = p->planes[2 + (++p->temp_idx & 1)]; + my_memcpy_pic(planes[0], + mpi->planes[0] + p->crop_x + p->crop_y * mpi->stride[0], + p->w, p->h, p->stride, mpi->stride[0]); + if (mpi->flags & MP_IMGFLAG_PLANAR) { + my_memcpy_pic(planes[1], + mpi->planes[1] + p->crop_cx + p->crop_cy * mpi->stride[1], + p->cw, p->ch, p->chroma_stride, mpi->stride[1]); + my_memcpy_pic(planes[2], + mpi->planes[2] + p->crop_cx + p->crop_cy * mpi->stride[2], + p->cw, p->ch, p->chroma_stride, mpi->stride[2]); + } + } + + p->old_planes = planes; + p->chflag = ';'; + if (flags & MP_IMGFIELD_ORDERED) { + swapped = !(flags & MP_IMGFIELD_TOP_FIRST); + p->chflag = (flags & MP_IMGFIELD_REPEAT_FIRST ? '|' : + flags & MP_IMGFIELD_TOP_FIRST ? ':' : '.'); + } + p->swapped = swapped; + + start_time = get_time(); + if (p->chflag == '|') { + *s = ppzs; + p->iosync += p->in_inc; + } else if ((p->fast & 1) && prev_chflag == '|') + *s = pprs; + else + diff_fields(p, s, old_planes, planes); + diff_time = get_time(); + p->diff_time += diff_time - start_time; + breaks = p->inframes ? find_breaks(p, s) : 2; + p->inframes++; + keep_rate = 4*p->in_inc == p->out_dec; + + switch (breaks) { + case 0: + case 8: + case 9: + case 10: + if (!keep_rate && p->notout < p->num_fields && p->iosync < 2*p->in_inc) + break; + if (p->notout < p->num_fields) + dropped_fields = -2; + case 4: + if (keep_rate || p->iosync >= -2*p->in_inc) + show_fields = (4<<p->num_fields)-1; + break; + case 3: + if (keep_rate) + show_fields = 2; + else if (p->iosync > 0) { + if (p->notout >= p->num_fields && p->iosync > 2*p->in_inc) { + show_fields = 4; /* prev odd only */ + if (p->num_fields > 1) + show_fields |= 8; /* + prev even */ + } else { + show_fields = 2; /* even only */ + if (p->notout >= p->num_fields) + dropped_fields += p->num_fields; + } + } + break; + case 2: + if (p->iosync <= -3*p->in_inc) { + if (p->notout >= p->num_fields) + dropped_fields = p->num_fields; + break; + } + if (p->num_fields == 1) { + int prevbreak = ps->sad.noise >= 128; + if (p->iosync < 4*p->in_inc) { + show_fields = 3; + dropped_fields = prevbreak; + } else { + show_fields = 4 | (!prevbreak << 3); + if (p->notout < 1 + p->prev_fields) + dropped_fields = -!prevbreak; + } + break; + } + default: + if (keep_rate) + show_fields = 3 << (breaks & 1); + else if (p->notout >= p->num_fields && + p->iosync >= (breaks == 1 ? -p->in_inc : + p->in_inc << (p->num_fields == 1))) { + show_fields = (1 << (2 + p->num_fields)) - (1<<breaks); + } else { + if (p->notout >= p->num_fields) + dropped_fields += p->num_fields + 2 - breaks; + if (breaks == 2 && p->iosync > -3*p->in_inc) + show_fields = 3; /* odd+even */ + } + break; + } + + show_fields &= 15; + prev = p->prev_fields; + if (breaks < 8) { + if (p->num_fields == 1) + breaks &= ~4; + if (breaks) + p->num_breaks++; + if (breaks == 3) + p->prev_fields = p->num_fields = 1; + else if (breaks) { + p->prev_fields = p->num_fields + (breaks==1) - (breaks==4); + p->num_fields = breaks - (breaks == 4) + (p->chflag == '|'); + } else + p->num_fields += 2; + } else + p->num_fields += 2; + + p->iosync += 4 * p->in_inc; + if (p->chflag == '|') + p->iosync += p->in_inc; + + if (show_fields) { + p->iosync -= p->out_dec; + p->notout = !(show_fields & 1) + !(show_fields & 3); + if (((show_fields & 3) == 3 && + (s->low.noise + s->interlaced_low < (s->num_blocks>>8) || + s->sad.noise < 160)) || + ((show_fields & 12) == 12 && + (ps->low.noise + ps->interlaced_low < (s->num_blocks>>8) || + ps->sad.noise < 160))) { + p->export_count++; + dmpi = vf_get_image(vf->next, mpi->imgfmt, MP_IMGTYPE_EXPORT, + MP_IMGFLAG_PRESERVE|MP_IMGFLAG_READABLE, + p->w, p->h); + if ((show_fields & 3) != 3) planes = old_planes; + dmpi->planes[0] = planes[0]; + dmpi->stride[0] = p->stride; + dmpi->width = mpi->width; + if (mpi->flags & MP_IMGFLAG_PLANAR) { + dmpi->planes[1] = planes[1]; + dmpi->planes[2] = planes[2]; + dmpi->stride[1] = p->chroma_stride; + dmpi->stride[2] = p->chroma_stride; + } + } else { + p->merge_count++; + dmpi = vf_get_image(vf->next, mpi->imgfmt, + MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE, + p->w, p->h); + copy_merge_fields(p, dmpi, old_planes, planes, show_fields); + } + p->outframes++; + } else + p->notout += 2; + + if (p->verbose) + mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu %lu: %x %c %c %lu%s%s%c%s\n", + p->inframes, p->outframes, + breaks, breaks<8 && breaks>0 ? (int) p->prev_fields+'0' : ' ', + ITOC(show_fields), + p->num_breaks, 5*p->in_inc == p->out_dec && breaks<8 && + breaks>0 && ((prev&~1)!=2 || prev+p->prev_fields!=5) ? + " ######## bad telecine ########" : "", + dropped_fields ? " ======== dropped ":"", ITOC(dropped_fields), + !show_fields || (show_fields & (show_fields-1)) ? + "" : " @@@@@@@@@@@@@@@@@"); + + p->merge_time += get_time() - diff_time; + return show_fields ? vf_next_put_image(vf, dmpi) : 0; +} + +static int query_format(struct vf_instance_s* vf, unsigned int fmt) +{ + /* FIXME - support more formats */ + switch (fmt) { + case IMGFMT_YV12: + case IMGFMT_IYUV: + case IMGFMT_I420: + case IMGFMT_411P: + case IMGFMT_422P: + case IMGFMT_444P: + return vf_next_query_format(vf, fmt); + } + return 0; +} + +static int config(struct vf_instance_s* vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt) +{ + unsigned long cxm = 0; + unsigned long cym = 0; + struct vf_priv_s *p = vf->priv; + // rounding: + if(!IMGFMT_IS_RGB(outfmt) && !IMGFMT_IS_BGR(outfmt)){ + switch(outfmt){ + case IMGFMT_444P: + case IMGFMT_Y800: + case IMGFMT_Y8: + break; + case IMGFMT_YVU9: + case IMGFMT_IF09: + cym = 3; + case IMGFMT_411P: + cxm = 3; + break; + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + cym = 1; + default: + cxm = 1; + } + } + p->chroma_swapped = !!(p->crop_y & (cym+1)); + if (p->w) p->w += p->crop_x & cxm; + if (p->h) p->h += p->crop_y & cym; + p->crop_x &= ~cxm; + p->crop_y &= ~cym; + if (!p->w || p->w > width ) p->w = width; + if (!p->h || p->h > height) p->h = height; + if (p->crop_x + p->w > width ) p->crop_x = 0; + if (p->crop_y + p->h > height) p->crop_y = 0; + + if(!opt_screen_size_x && !opt_screen_size_y){ + d_width = d_width * p->w/width; + d_height = d_height * p->h/height; + } + return vf_next_config(vf, p->w, p->h, d_width, d_height, flags, outfmt); +} + +static void uninit(struct vf_instance_s* vf) +{ + struct vf_priv_s *p = vf->priv; + mp_msg(MSGT_VFILTER, MSGL_INFO, "diff_time: %.3f, merge_time: %.3f, " + "export: %lu, merge: %lu\n", p->diff_time, p->merge_time, + p->export_count, p->merge_count); + free(p->memory_allocated); + free(p); +} + +static int open(vf_instance_t *vf, char* args) +{ + struct vf_priv_s *p; + vf->get_image = get_image; + vf->put_image = put_image; + vf->config = config; + vf->query_format = query_format; + vf->uninit = uninit; + vf->default_reqs = VFCAP_ACCEPT_STRIDE; + vf->priv = p = calloc(1, sizeof(struct vf_priv_s)); + p->out_dec = 5; + p->in_inc = 4; + p->thres.noise = 128; + p->thres.even = 128; + p->sad_thres = 64; + p->dint_thres = 4; + p->luma_only = 0; + p->fast = 3; + p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0; + if (args) { + const char *args_remain = parse_args(p, args); + if (args_remain) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "filmdint: unknown suboption: %s\n", args_remain); + return 0; + } + if (p->out_dec < p->in_inc) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "filmdint: increasing the frame rate is not supported\n"); + return 0; + } + } + if (p->mmx2 > 2) + p->mmx2 = 0; +#ifndef HAVE_MMX + p->mmx2 = 0; +#endif +#ifndef HAVE_3DNOW + p->mmx2 &= 1; +#endif + p->thres.odd = p->thres.even; + p->thres.temp = p->thres.noise; + p->diff_time = 0; + p->merge_time = 0; + return 1; +} + +vf_info_t vf_info_filmdint = { + "Advanced inverse telecine filer", + "filmdint", + "Zoltan Hidvegi", + "", + open, + NULL +};