view libmpcodecs/pullup.c @ 14577:36fda489ed86

avoid using vo_subdevice
author alex
date Sat, 22 Jan 2005 18:05:07 +0000
parents 821f464b4d90
children f5e2fa7ec219
line wrap: on
line source



#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pullup.h"
#include "config.h"



#ifdef ARCH_X86
#ifdef HAVE_MMX
static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
{
	int ret;
	asm volatile (
		"movl $4, %%ecx \n\t"
		"pxor %%mm4, %%mm4 \n\t"
		"pxor %%mm7, %%mm7 \n\t"
		
		".balign 16 \n\t"
		"1: \n\t"
		
		"movq (%%esi), %%mm0 \n\t"
		"movq (%%esi), %%mm2 \n\t"
		"addl %%eax, %%esi \n\t"
		"movq (%%edi), %%mm1 \n\t"
		"addl %%eax, %%edi \n\t"
		"psubusb %%mm1, %%mm2 \n\t"
		"psubusb %%mm0, %%mm1 \n\t"
		"movq %%mm2, %%mm0 \n\t"
		"movq %%mm1, %%mm3 \n\t"
		"punpcklbw %%mm7, %%mm0 \n\t"
		"punpcklbw %%mm7, %%mm1 \n\t"
		"punpckhbw %%mm7, %%mm2 \n\t"
		"punpckhbw %%mm7, %%mm3 \n\t"
		"paddw %%mm0, %%mm4 \n\t"
		"paddw %%mm1, %%mm4 \n\t"
		"paddw %%mm2, %%mm4 \n\t"
		"paddw %%mm3, %%mm4 \n\t"
		
		"decl %%ecx \n\t"
		"jnz 1b \n\t"

		"movq %%mm4, %%mm3 \n\t"
		"punpcklwd %%mm7, %%mm4 \n\t"
		"punpckhwd %%mm7, %%mm3 \n\t"
		"paddd %%mm4, %%mm3 \n\t"
		"movd %%mm3, %%eax \n\t"
		"psrlq $32, %%mm3 \n\t"
		"movd %%mm3, %%edx \n\t"
		"addl %%edx, %%eax \n\t"
		"emms \n\t"
		: "=a" (ret)
		: "S" (a), "D" (b), "a" (s)
		: "%edx"
		);
	return ret;
}

static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
{
	int ret;
	asm volatile (
		"movl $4, %%ecx \n\t"
		"pxor %%mm6, %%mm6 \n\t"
		"pxor %%mm7, %%mm7 \n\t"
		"subl %%eax, %%edi \n\t"
		
		".balign 16 \n\t"
		"2: \n\t"

		"movq (%%esi), %%mm0 \n\t"
		"movq (%%edi), %%mm1 \n\t"
		"punpcklbw %%mm7, %%mm0 \n\t"
		"movq (%%edi,%%eax), %%mm2 \n\t"
		"punpcklbw %%mm7, %%mm1 \n\t"
		"punpcklbw %%mm7, %%mm2 \n\t"
		"paddw %%mm0, %%mm0 \n\t"
		"paddw %%mm2, %%mm1 \n\t"
		"movq %%mm0, %%mm2 \n\t"
		"psubusw %%mm1, %%mm0 \n\t"
		"psubusw %%mm2, %%mm1 \n\t"
		"paddw %%mm0, %%mm6 \n\t"
		"paddw %%mm1, %%mm6 \n\t"

		"movq (%%esi), %%mm0 \n\t"
		"movq (%%edi), %%mm1 \n\t"
		"punpckhbw %%mm7, %%mm0 \n\t"
		"movq (%%edi,%%eax), %%mm2 \n\t"
		"punpckhbw %%mm7, %%mm1 \n\t"
		"punpckhbw %%mm7, %%mm2 \n\t"
		"paddw %%mm0, %%mm0 \n\t"
		"paddw %%mm2, %%mm1 \n\t"
		"movq %%mm0, %%mm2 \n\t"
		"psubusw %%mm1, %%mm0 \n\t"
		"psubusw %%mm2, %%mm1 \n\t"
		"paddw %%mm0, %%mm6 \n\t"
		"paddw %%mm1, %%mm6 \n\t"
		
		"movq (%%edi,%%eax), %%mm0 \n\t"
		"movq (%%esi), %%mm1 \n\t"
		"punpcklbw %%mm7, %%mm0 \n\t"
		"movq (%%esi,%%eax), %%mm2 \n\t"
		"punpcklbw %%mm7, %%mm1 \n\t"
		"punpcklbw %%mm7, %%mm2 \n\t"
		"paddw %%mm0, %%mm0 \n\t"
		"paddw %%mm2, %%mm1 \n\t"
		"movq %%mm0, %%mm2 \n\t"
		"psubusw %%mm1, %%mm0 \n\t"
		"psubusw %%mm2, %%mm1 \n\t"
		"paddw %%mm0, %%mm6 \n\t"
		"paddw %%mm1, %%mm6 \n\t"
		
		"movq (%%edi,%%eax), %%mm0 \n\t"
		"movq (%%esi), %%mm1 \n\t"
		"punpckhbw %%mm7, %%mm0 \n\t"
		"movq (%%esi,%%eax), %%mm2 \n\t"
		"punpckhbw %%mm7, %%mm1 \n\t"
		"punpckhbw %%mm7, %%mm2 \n\t"
		"paddw %%mm0, %%mm0 \n\t"
		"paddw %%mm2, %%mm1 \n\t"
		"movq %%mm0, %%mm2 \n\t"
		"psubusw %%mm1, %%mm0 \n\t"
		"psubusw %%mm2, %%mm1 \n\t"
		"paddw %%mm0, %%mm6 \n\t"
		"paddw %%mm1, %%mm6 \n\t"

		"addl %%eax, %%esi \n\t"
		"addl %%eax, %%edi \n\t"
		"decl %%ecx \n\t"
		"jnz 2b \n\t"
		
		"movq %%mm6, %%mm5 \n\t"
		"punpcklwd %%mm7, %%mm6 \n\t"
		"punpckhwd %%mm7, %%mm5 \n\t"
		"paddd %%mm6, %%mm5 \n\t"
		"movd %%mm5, %%eax \n\t"
		"psrlq $32, %%mm5 \n\t"
		"movd %%mm5, %%edx \n\t"
		"addl %%edx, %%eax \n\t"
		
		"emms \n\t"
		: "=a" (ret)
		: "S" (a), "D" (b), "a" (s)
		: "%edx"
		);
	return ret;
}
#endif
#endif

#define ABS(a) (((a)^((a)>>31))-((a)>>31))

static int diff_y(unsigned char *a, unsigned char *b, int s)
{
	int i, j, diff=0;
	for (i=4; i; i--) {
		for (j=0; j<8; j++) diff += ABS(a[j]-b[j]);
		a+=s; b+=s;
	}
	return diff;
}

static int licomb_y(unsigned char *a, unsigned char *b, int s)
{
	int i, j, diff=0;
	for (i=4; i; i--) {
		for (j=0; j<8; j++)
			diff += ABS((a[j]<<1) - b[j-s] - b[j])
				+ ABS((b[j]<<1) - a[j] - a[j+s]);
		a+=s; b+=s;
	}
	return diff;
}

static int qpcomb_y(unsigned char *a, unsigned char *b, int s)
{
	int i, j, diff=0;
	for (i=4; i; i--) {
		for (j=0; j<8; j++)
			diff += ABS(a[j] - 3*b[j-s] + 3*a[j+s] - b[j]);
		a+=s; b+=s;
	}
	return diff;
}

#if 0
static int licomb_y_test(unsigned char *a, unsigned char *b, int s)
{
	int c = licomb_y(a,b,s);
	int m = licomb_y_mmx(a,b,s);
	if (c != m) printf("%d != %d\n", c, m);
	return m;
}
#endif









static void alloc_buffer(struct pullup_context *c, struct pullup_buffer *b)
{
	int i;
	if (b->planes) return;
	b->planes = calloc(c->nplanes, sizeof(unsigned char *));
	for (i = 0; i < c->nplanes; i++) {
		b->planes[i] = malloc(c->h[i]*c->stride[i]);
		/* Deal with idiotic 128=0 for chroma: */
		memset(b->planes[i], c->background[i], c->h[i]*c->stride[i]);
	}
}

struct pullup_buffer *pullup_lock_buffer(struct pullup_buffer *b, int parity)
{
	if (!b) return 0;
	if (parity+1 & 1) b->lock[0]++;
	if (parity+1 & 2) b->lock[1]++;
	return b;
}

void pullup_release_buffer(struct pullup_buffer *b, int parity)
{
	if (!b) return;
	if (parity+1 & 1) b->lock[0]--;
	if (parity+1 & 2) b->lock[1]--;
}

struct pullup_buffer *pullup_get_buffer(struct pullup_context *c, int parity)
{
	int i;

	/* Try first to get the sister buffer for the previous field */
	if (parity < 2 && c->last && parity != c->last->parity
	    && !c->last->buffer->lock[parity]) {
		alloc_buffer(c, c->last->buffer);
		return pullup_lock_buffer(c->last->buffer, parity);
	}
	
	/* Prefer a buffer with both fields open */
	for (i = 0; i < c->nbuffers; i++) {
		if (c->buffers[i].lock[0]) continue;
		if (c->buffers[i].lock[1]) continue;
		alloc_buffer(c, &c->buffers[i]);
		return pullup_lock_buffer(&c->buffers[i], parity);
	}

	if (parity == 2) return 0;
	
	/* Search for any half-free buffer */
	for (i = 0; i < c->nbuffers; i++) {
		if (parity+1 & 1 && c->buffers[i].lock[0]) continue;
		if (parity+1 & 2 && c->buffers[i].lock[1]) continue;
		alloc_buffer(c, &c->buffers[i]);
		return pullup_lock_buffer(&c->buffers[i], parity);
	}
	
	return 0;
}






static void compute_metric(struct pullup_context *c,
	struct pullup_field *fa, int pa,
	struct pullup_field *fb, int pb,
	int (*func)(unsigned char *, unsigned char *, int), int *dest)
{
	unsigned char *a, *b;
	int x, y;
	int mp = c->metric_plane;
	int xstep = c->bpp[mp];
	int ystep = c->stride[mp]<<3;
	int s = c->stride[mp]<<1; /* field stride */
	int w = c->metric_w*xstep;

	if (!fa->buffer || !fb->buffer) return;

	/* Shortcut for duplicate fields (e.g. from RFF flag) */
	if (fa->buffer == fb->buffer && pa == pb) {
		memset(dest, 0, c->metric_len * sizeof(int));
		return;
	}

	a = fa->buffer->planes[mp] + pa * c->stride[mp] + c->metric_offset;
	b = fb->buffer->planes[mp] + pb * c->stride[mp] + c->metric_offset;

	for (y = c->metric_h; y; y--) {
		for (x = 0; x < w; x += xstep) {
			*dest++ = func(a + x, b + x, s);
		}
		a += ystep; b += ystep;
	}
}





static void alloc_metrics(struct pullup_context *c, struct pullup_field *f)
{
	f->diffs = calloc(c->metric_len, sizeof(int));
	f->comb = calloc(c->metric_len, sizeof(int));
	/* add more metrics here as needed */
}

static struct pullup_field *make_field_queue(struct pullup_context *c, int len)
{
	struct pullup_field *head, *f;
	f = head = calloc(1, sizeof(struct pullup_field));
	alloc_metrics(c, f);
	for (; len > 0; len--) {
		f->next = calloc(1, sizeof(struct pullup_field));
		f->next->prev = f;
		f = f->next;
		alloc_metrics(c, f);
	}
	f->next = head;
	head->prev = f;
	return head;
}

static void check_field_queue(struct pullup_context *c)
{
	if (c->head->next == c->first) {
		struct pullup_field *f = calloc(1, sizeof(struct pullup_field));
		alloc_metrics(c, f);
		f->prev = c->head;
		f->next = c->first;
		c->head->next = f;
		c->first->prev = f;
	}
}

void pullup_submit_field(struct pullup_context *c, struct pullup_buffer *b, int parity)
{
	struct pullup_field *f;
	
	/* Grow the circular list if needed */
	check_field_queue(c);
	
	/* Cannot have two fields of same parity in a row; drop the new one */
	if (c->last && c->last->parity == parity) return;

	f = c->head;
	f->parity = parity;
	f->buffer = pullup_lock_buffer(b, parity);
	f->flags = 0;
	f->breaks = 0;
	f->affinity = 0;

	compute_metric(c, f, parity, f->prev->prev, parity, c->diff, f->diffs);
	compute_metric(c, parity?f->prev:f, 0, parity?f:f->prev, 1, c->comb, f->comb);

	/* Advance the circular list */
	if (!c->first) c->first = c->head;
	c->last = c->head;
	c->head = c->head->next;
}

void pullup_flush_fields(struct pullup_context *c)
{
	struct pullup_field *f;
	
	for (f = c->first; f && f != c->head; f = f->next) {
		pullup_release_buffer(f->buffer, f->parity);
		f->buffer = 0;
	}
	c->first = c->last = 0;
}








#define F_HAVE_BREAKS 1
#define F_HAVE_AFFINITY 2


#define BREAK_LEFT 1
#define BREAK_RIGHT 2




static int queue_length(struct pullup_field *begin, struct pullup_field *end)
{
	int count = 1;
	struct pullup_field *f;
	
	if (!begin || !end) return 0;
	for (f = begin; f != end; f = f->next) count++;
	return count;
}

static int find_first_break(struct pullup_field *f, int max)
{
	int i;
	for (i = 0; i < max; i++) {
		if (f->breaks & BREAK_RIGHT || f->next->breaks & BREAK_LEFT)
			return i+1;
		f = f->next;
	}
	return 0;
}

static void compute_breaks(struct pullup_context *c, struct pullup_field *f0)
{
	int i;
	struct pullup_field *f1 = f0->next;
	struct pullup_field *f2 = f1->next;
	struct pullup_field *f3 = f2->next;
	int l, max_l=0, max_r=0;
	//struct pullup_field *ff;
	//for (i=0, ff=c->first; ff != f0; i++, ff=ff->next);

	if (f0->flags & F_HAVE_BREAKS) return;
	//printf("\n%d: ", i);
	f0->flags |= F_HAVE_BREAKS;

	/* Special case when fields are 100% identical */
	if (f0->buffer == f2->buffer && f1->buffer != f3->buffer) {
		f2->breaks |= BREAK_RIGHT;
		return;
	}
	if (f0->buffer != f2->buffer && f1->buffer == f3->buffer) {
		f1->breaks |= BREAK_LEFT;
		return;
	}

	for (i = 0; i < c->metric_len; i++) {
		l = f2->diffs[i] - f3->diffs[i];
		if (l > max_l) max_l = l;
		if (-l > max_r) max_r = -l;
	}
	/* Don't get tripped up when differences are mostly quant error */
	//printf("%d %d\n", max_l, max_r);
	if (max_l + max_r < 128) return;
	if (max_l > 4*max_r) f1->breaks |= BREAK_LEFT;
	if (max_r > 4*max_l) f2->breaks |= BREAK_RIGHT;
}

static void compute_affinity(struct pullup_context *c, struct pullup_field *f)
{
	int i;
	int max_l=0, max_r=0, l;
	if (f->flags & F_HAVE_AFFINITY) return;
	f->flags |= F_HAVE_AFFINITY;
	for (i = 0; i < c->metric_len; i++) {
		l = f->comb[i] - f->next->comb[i];
		if (l > max_l) max_l = l;
		if (-l > max_r) max_r = -l;
	}
	if (max_l + max_r < 64) return;
	if (max_r > 2*max_l) f->affinity = -1;
	else if (max_l > 2*max_r) f->affinity = 1;
}

static void foo(struct pullup_context *c)
{
	struct pullup_field *f = c->first;
	int i, n = queue_length(f, c->last);
	for (i = 0; i < n-1; i++) {
		if (i < n-3) compute_breaks(c, f);
		compute_affinity(c, f);
		f = f->next;
	}
}

static int decide_frame_length(struct pullup_context *c)
{
	struct pullup_field *f0 = c->first;
	struct pullup_field *f1 = f0->next;
	struct pullup_field *f2 = f1->next;
	struct pullup_field *f3 = f2->next;
	int l;
	
	if (queue_length(c->first, c->last) < 6) return 0;
	foo(c);

	if (f0->affinity == -1) return 1;

	l = find_first_break(f0, 3);
	if (l == 1 && c->strict_breaks < 0) l = 0;
	
	switch (l) {
	case 1:
		if (c->strict_breaks > 0 && f0->affinity == 1 && f1->affinity == -1)
			return 2;
		else return 1;
	case 2:
		/* FIXME: strictly speaking, f0->prev is no longer valid... :) */
		if (c->strict_pairs
			&& (f0->prev->breaks & BREAK_RIGHT) && (f2->breaks & BREAK_LEFT)
			&& (f0->affinity != 1 || f1->affinity != -1) )
			return 1;
		if (f1->affinity == 1) return 1;
		else return 2;
	case 3:
		if (f2->affinity == 1) return 2;
		else return 3;
	default:
		/* 9 possibilities covered before switch */
		if (f1->affinity == 1) return 1; /* covers 6 */
		else if (f1->affinity == -1) return 2; /* covers 6 */
		else if (f2->affinity == -1) { /* covers 2 */
			if (f0->affinity == 1) return 3;
			else return 1;
		}
		else return 2; /* the remaining 6 */
	}
}


static void print_aff_and_breaks(struct pullup_context *c, struct pullup_field *f)
{
	int i;
	int max_l, max_r, l;
	struct pullup_field *f0 = f;
	const char aff_l[] = "+..", aff_r[] = "..+";
	printf("\naffinity: ");
	for (i = 0; i < 6; i++) {
		printf("%c%d%c", aff_l[1+f->affinity], i, aff_r[1+f->affinity]);
		f = f->next;
	}
	f = f0;
	printf("\nbreaks:   ");
	for (i=0; i<6; i++) {
		printf("%c%d%c", f->breaks & BREAK_LEFT ? '|' : '.', i, f->breaks & BREAK_RIGHT ? '|' : '.');
		f = f->next;
	}
	printf("\n");
}





struct pullup_frame *pullup_get_frame(struct pullup_context *c)
{
	int i;
	struct pullup_frame *fr = c->frame;
	int n = decide_frame_length(c);
	int aff = c->first->next->affinity;

	if (!n) return 0;
	if (fr->lock) return 0;

	if (c->verbose) {
		print_aff_and_breaks(c, c->first);
		printf("duration: %d    \n", n);
	}

	fr->lock++;
	fr->length = n;
	fr->parity = c->first->parity;
	fr->buffer = 0;
	for (i = 0; i < n; i++) {
		/* We cheat and steal the buffer without release+relock */
		fr->ifields[i] = c->first->buffer;
		c->first->buffer = 0;
		c->first = c->first->next;
	}
	
	if (n == 1) {
		fr->ofields[fr->parity] = fr->ifields[0];
		fr->ofields[fr->parity^1] = 0;
	} else if (n == 2) {
		fr->ofields[fr->parity] = fr->ifields[0];
		fr->ofields[fr->parity^1] = fr->ifields[1];
	} else if (n == 3) {
		if (aff == 0)
			aff = (fr->ifields[0] == fr->ifields[1]) ? -1 : 1;
		/* else if (c->verbose) printf("forced aff: %d    \n", aff); */
		fr->ofields[fr->parity] = fr->ifields[1+aff];
		fr->ofields[fr->parity^1] = fr->ifields[1];
	}
	pullup_lock_buffer(fr->ofields[0], 0);
	pullup_lock_buffer(fr->ofields[1], 1);
	
	if (fr->ofields[0] == fr->ofields[1]) {
		fr->buffer = fr->ofields[0];
		pullup_lock_buffer(fr->buffer, 2);
		return fr;
	}
	return fr;
}

static void copy_field(struct pullup_context *c, struct pullup_buffer *dest,
	struct pullup_buffer *src, int parity)
{
	int i, j;
	unsigned char *d, *s;
	for (i = 0; i < c->nplanes; i++) {
		s = src->planes[i] + parity*c->stride[i];
		d = dest->planes[i] + parity*c->stride[i];
		for (j = c->h[i]>>1; j; j--) {
			memcpy(d, s, c->stride[i]);
			s += c->stride[i]<<1;
			d += c->stride[i]<<1;
		}
	}
}

void pullup_pack_frame(struct pullup_context *c, struct pullup_frame *fr)
{
	int i;
	int par = fr->parity;
	if (fr->buffer) return;
	if (fr->length < 2) return; /* FIXME: deal with this */
	for (i = 0; i < 2; i++)
	{
		if (fr->ofields[i]->lock[i^1]) continue;
		fr->buffer = fr->ofields[i];
		pullup_lock_buffer(fr->buffer, 2);
		copy_field(c, fr->buffer, fr->ofields[i^1], i^1);
		return;
	}
	fr->buffer = pullup_get_buffer(c, 2);
	copy_field(c, fr->buffer, fr->ofields[0], 0);
	copy_field(c, fr->buffer, fr->ofields[1], 1);
}

void pullup_release_frame(struct pullup_frame *fr)
{
	int i;
	for (i = 0; i < fr->length; i++)
		pullup_release_buffer(fr->ifields[i], fr->parity ^ (i&1));
	pullup_release_buffer(fr->ofields[0], 0);
	pullup_release_buffer(fr->ofields[1], 1);
	if (fr->buffer) pullup_release_buffer(fr->buffer, 2);
	fr->lock--;
}






struct pullup_context *pullup_alloc_context()
{
	struct pullup_context *c;

	c = calloc(1, sizeof(struct pullup_context));

	return c;
}

void pullup_preinit_context(struct pullup_context *c)
{
	c->bpp = calloc(c->nplanes, sizeof(int));
	c->w = calloc(c->nplanes, sizeof(int));
	c->h = calloc(c->nplanes, sizeof(int));
	c->stride = calloc(c->nplanes, sizeof(int));
	c->background = calloc(c->nplanes, sizeof(int));
}

void pullup_init_context(struct pullup_context *c)
{
	int mp = c->metric_plane;
	if (c->nbuffers < 10) c->nbuffers = 10;
	c->buffers = calloc(c->nbuffers, sizeof (struct pullup_buffer));

	c->metric_w = (c->w[mp] - (c->junk_left + c->junk_right << 3)) >> 3;
	c->metric_h = (c->h[mp] - (c->junk_top + c->junk_bottom << 1)) >> 3;
	c->metric_offset = c->junk_left*c->bpp[mp] + (c->junk_top<<1)*c->stride[mp];
	c->metric_len = c->metric_w * c->metric_h;
	
	c->head = make_field_queue(c, 8);

	c->frame = calloc(1, sizeof (struct pullup_frame));
	c->frame->ifields = calloc(3, sizeof (struct pullup_buffer *));

	switch(c->format) {
	case PULLUP_FMT_Y:
		c->diff = diff_y;
		c->comb = licomb_y;
#ifdef ARCH_X86
#ifdef HAVE_MMX
		if (c->cpu & PULLUP_CPU_MMX) {
			c->diff = diff_y_mmx;
			c->comb = licomb_y_mmx;
		}
#endif
#endif
		/* c->comb = qpcomb_y; */
		break;
#if 0
	case PULLUP_FMT_YUY2:
		c->diff = diff_yuy2;
		break;
	case PULLUP_FMT_RGB32:
		c->diff = diff_rgb32;
		break;
#endif
	}
}

void pullup_free_context(struct pullup_context *c)
{
	struct pullup_field *f;
	free(c->buffers);
	f = c->head;
	do {
		free(f->diffs);
		free(f->comb);
		f = f->next;
		free(f->prev);
	} while (f != c->head);
	free(c->frame);
	free(c);
}