Mercurial > mplayer.hg
view libmpcodecs/pullup.c @ 11073:0c633f49231d
added a few more device ids
author | faust3 |
---|---|
date | Fri, 10 Oct 2003 07:12:37 +0000 |
parents | fdcd0d571912 |
children | ec3dac7d17a0 |
line wrap: on
line source
#include <stdlib.h> #include "pullup.h" #include "config.h" #ifdef HAVE_MMX static int diff_y_mmx(unsigned char *a, unsigned char *b, int s) { int ret; asm volatile ( "movl $4, %%ecx \n\t" "pxor %%mm4, %%mm4 \n\t" "pxor %%mm7, %%mm7 \n\t" ".balign 16 \n\t" "1: \n\t" "movq (%%esi), %%mm0 \n\t" "movq (%%esi), %%mm2 \n\t" "addl %%eax, %%esi \n\t" "movq (%%edi), %%mm1 \n\t" "addl %%eax, %%edi \n\t" "psubusb %%mm1, %%mm2 \n\t" "psubusb %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm0 \n\t" "movq %%mm1, %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t" "punpckhbw %%mm7, %%mm3 \n\t" "paddw %%mm0, %%mm4 \n\t" "paddw %%mm1, %%mm4 \n\t" "paddw %%mm2, %%mm4 \n\t" "paddw %%mm3, %%mm4 \n\t" "decl %%ecx \n\t" "jnz 1b \n\t" "movq %%mm4, %%mm3 \n\t" "punpcklwd %%mm7, %%mm4 \n\t" "punpckhwd %%mm7, %%mm3 \n\t" "paddd %%mm4, %%mm3 \n\t" "movd %%mm3, %%eax \n\t" "psrlq $32, %%mm3 \n\t" "movd %%mm3, %%edx \n\t" "addl %%edx, %%eax \n\t" "emms \n\t" : "=a" (ret) : "S" (a), "D" (b), "a" (s) : "%edx" ); return ret; } static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s) { int ret; asm volatile ( "movl $8, %%ecx \n\t" "pxor %%mm6, %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "subl %%eax, %%edi \n\t" ".balign 16 \n\t" "2: \n\t" "movq (%%esi), %%mm0 \n\t" "movq (%%edi), %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "movq (%%edi,%%eax), %%mm2 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "paddw %%mm0, %%mm0 \n\t" "paddw %%mm2, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "psubusw %%mm1, %%mm0 \n\t" "psubusw %%mm2, %%mm1 \n\t" "paddw %%mm0, %%mm6 \n\t" "paddw %%mm1, %%mm6 \n\t" "movq (%%esi), %%mm0 \n\t" "movq (%%edi), %%mm1 \n\t" "punpckhbw %%mm7, %%mm0 \n\t" "movq (%%edi,%%eax), %%mm2 \n\t" "punpckhbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t" "paddw %%mm0, %%mm0 \n\t" "paddw %%mm2, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "psubusw %%mm1, %%mm0 \n\t" "psubusw %%mm2, %%mm1 \n\t" "paddw %%mm0, %%mm6 \n\t" "paddw %%mm1, %%mm6 \n\t" "movq (%%edi,%%eax), %%mm0 \n\t" "movq (%%esi), %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "movq (%%esi,%%eax), %%mm2 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "paddw %%mm0, %%mm0 \n\t" "paddw %%mm2, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "psubusw %%mm1, %%mm0 \n\t" "psubusw %%mm2, %%mm1 \n\t" "paddw %%mm0, %%mm6 \n\t" "paddw %%mm1, %%mm6 \n\t" "movq (%%edi,%%eax), %%mm0 \n\t" "movq (%%esi), %%mm1 \n\t" "punpckhbw %%mm7, %%mm0 \n\t" "movq (%%esi,%%eax), %%mm2 \n\t" "punpckhbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t" "paddw %%mm0, %%mm0 \n\t" "paddw %%mm2, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "psubusw %%mm1, %%mm0 \n\t" "psubusw %%mm2, %%mm1 \n\t" "paddw %%mm0, %%mm6 \n\t" "paddw %%mm1, %%mm6 \n\t" "addl %%eax, %%esi \n\t" "addl %%eax, %%edi \n\t" "decl %%ecx \n\t" "jnz 2b \n\t" "movq %%mm6, %%mm5 \n\t" "punpcklwd %%mm7, %%mm6 \n\t" "punpckhwd %%mm7, %%mm5 \n\t" "paddd %%mm6, %%mm5 \n\t" "movd %%mm5, %%eax \n\t" "psrlq $32, %%mm5 \n\t" "movd %%mm5, %%edx \n\t" "addl %%edx, %%eax \n\t" "emms \n\t" : "=a" (ret) : "S" (a), "D" (b), "a" (s) : "%edx" ); return ret; } #endif #define ABS(a) (((a)^((a)>>31))-((a)>>31)) static int diff_y(unsigned char *a, unsigned char *b, int s) { int i, j, diff=0; for (i=4; i; i--) { for (j=0; j<8; j++) diff += ABS(a[j]-b[j]); a+=s; b+=s; } return diff; } static int licomb_y(unsigned char *a, unsigned char *b, int s) { int i, j, diff=0; for (i=8; i; i--) { for (j=0; j<8; j++) diff += ABS((a[j]<<1) - b[j-s] - b[j]) + ABS((b[j]<<1) - a[j] - a[j+s]); a+=s; b+=s; } return diff; } #if 0 static int licomb_y_test(unsigned char *a, unsigned char *b, int s) { int c = licomb_y(a,b,s); int m = licomb_y_mmx(a,b,s); if (c != m) printf("%d != %d\n", c, m); return m; } #endif static void alloc_buffer(struct pullup_context *c, struct pullup_buffer *b) { int i; if (b->planes) return; b->planes = calloc(c->nplanes, sizeof(unsigned char *)); for (i = 0; i < c->nplanes; i++) { b->planes[i] = malloc(c->h[i]*c->stride[i]); /* Deal with idiotic 128=0 for chroma: */ memset(b->planes[i], c->background[i], c->h[i]*c->stride[i]); } } struct pullup_buffer *pullup_lock_buffer(struct pullup_buffer *b, int parity) { if (parity+1 & 1) b->lock[0]++; if (parity+1 & 2) b->lock[1]++; return b; } void pullup_release_buffer(struct pullup_buffer *b, int parity) { if (parity+1 & 1) b->lock[0]--; if (parity+1 & 2) b->lock[1]--; } struct pullup_buffer *pullup_get_buffer(struct pullup_context *c, int parity) { int i; /* Try first to get the sister buffer for the previous field */ if (parity < 2 && c->last && parity != c->last->parity && !c->last->buffer->lock[parity]) { alloc_buffer(c, c->last->buffer); return pullup_lock_buffer(c->last->buffer, parity); } /* Prefer a buffer with both fields open */ for (i = 0; i < c->nbuffers; i++) { if (c->buffers[i].lock[0]) continue; if (c->buffers[i].lock[1]) continue; alloc_buffer(c, &c->buffers[i]); return pullup_lock_buffer(&c->buffers[i], parity); } if (parity == 2) return 0; /* Search for any half-free buffer */ for (i = 0; i < c->nbuffers; i++) { if (parity+1 & 1 && c->buffers[i].lock[0]) continue; if (parity+1 & 2 && c->buffers[i].lock[1]) continue; alloc_buffer(c, &c->buffers[i]); return pullup_lock_buffer(&c->buffers[i], parity); } return 0; } static void compute_metric(struct pullup_context *c, struct pullup_field *fa, int pa, struct pullup_field *fb, int pb, int (*func)(unsigned char *, unsigned char *, int), int *dest) { unsigned char *a, *b; int x, y; int mp = c->metric_plane; int xstep = c->bpp[mp]; int ystep = c->stride[mp]<<3; int s = c->stride[mp]<<1; /* field stride */ int w = c->metric_w*xstep; if (!fa->buffer || !fb->buffer) return; /* Shortcut for duplicate fields (e.g. from RFF flag) */ if (fa->buffer == fb->buffer && pa == pb) { memset(dest, 0, c->metric_len * sizeof(int)); return; } a = fa->buffer->planes[mp] + pa * c->stride[mp] + c->metric_offset; b = fb->buffer->planes[mp] + pb * c->stride[mp] + c->metric_offset; for (y = c->metric_h; y; y--) { for (x = 0; x < w; x += xstep) { *dest++ = func(a + x, b + x, s); } a += ystep; b += ystep; } } static void alloc_metrics(struct pullup_context *c, struct pullup_field *f) { f->diffs = calloc(c->metric_len, sizeof(int)); f->licomb = calloc(c->metric_len, sizeof(int)); /* add more metrics here as needed */ } static struct pullup_field *make_field_queue(struct pullup_context *c, int len) { struct pullup_field *head, *f; f = head = calloc(1, sizeof(struct pullup_field)); alloc_metrics(c, f); for (; len > 0; len--) { f->next = calloc(1, sizeof(struct pullup_field)); f->next->prev = f; f = f->next; alloc_metrics(c, f); } f->next = head; head->prev = f; return head; } static void check_field_queue(struct pullup_context *c) { if (c->head->next == c->first) { struct pullup_field *f = calloc(1, sizeof(struct pullup_field)); alloc_metrics(c, f); f->prev = c->head; f->next = c->first; c->head->next = f; c->first->prev = f; } } int pullup_submit_field(struct pullup_context *c, struct pullup_buffer *b, int parity) { struct pullup_field *f; /* Grow the circular list if needed */ check_field_queue(c); /* Cannot have two fields of same parity in a row; drop the new one */ if (c->last && c->last->parity == parity) return 0; f = c->head; f->parity = parity; f->buffer = pullup_lock_buffer(b, parity); f->flags = 0; f->breaks = 0; f->affinity = 0; compute_metric(c, f, parity, f->prev->prev, parity, c->diff, f->diffs); compute_metric(c, parity?f->prev:f, 0, parity?f:f->prev, 1, c->licomb, f->licomb); /* Advance the circular list */ if (!c->first) c->first = c->head; c->last = c->head; c->head = c->head->next; } void pullup_flush_fields(struct pullup_context *c) { struct pullup_field *f; for (f = c->first; f && f != c->head; f = f->next) { pullup_release_buffer(f->buffer, f->parity); f->buffer = 0; } c->first = c->last = 0; } #define F_HAVE_BREAKS 1 #define F_HAVE_AFFINITY 2 #define BREAK_LEFT 1 #define BREAK_RIGHT 2 static int queue_length(struct pullup_field *begin, struct pullup_field *end) { int count = 1; struct pullup_field *f; if (!begin || !end) return 0; for (f = begin; f != end; f = f->next) count++; return count; } static int find_first_break(struct pullup_field *f, int max) { int i; for (i = 0; i < max; i++) { if (f->breaks & BREAK_RIGHT || f->next->breaks & BREAK_LEFT) return i+1; f = f->next; } return 0; } static void compute_breaks(struct pullup_context *c, struct pullup_field *f0) { int i; struct pullup_field *f1 = f0->next; struct pullup_field *f2 = f1->next; struct pullup_field *f3 = f2->next; int l, max_l=0, max_r=0; if (f0->flags & F_HAVE_BREAKS) return; f0->flags |= F_HAVE_BREAKS; /* Special case when fields are 100% identical */ if (f0->buffer == f2->buffer && f1->buffer != f3->buffer) { f0->breaks |= BREAK_LEFT; f2->breaks |= BREAK_RIGHT; return; } for (i = 0; i < c->metric_len; i++) { l = f2->diffs[i] - f3->diffs[i]; if (l > max_l) max_l = l; if (-l > max_r) max_r = -l; } /* Don't get tripped up when differences are mostly quant error */ if (max_l + max_r < 128) return; if (max_l > 4*max_r) f1->breaks |= BREAK_LEFT; if (max_r > 4*max_l) f2->breaks |= BREAK_RIGHT; } static void compute_affinity(struct pullup_context *c, struct pullup_field *f) { int i; int max_l=0, max_r=0, l, t; if (f->flags & F_HAVE_AFFINITY) return; f->flags |= F_HAVE_AFFINITY; for (i = 0; i < c->metric_len; i++) { l = f->licomb[i] - f->next->licomb[i]; if (l > max_l) max_l = l; if (-l > max_r) max_r = -l; } if (max_l + max_r < 128) return; if (max_r > 2*max_l) f->affinity = -1; else if (max_l > 2*max_r) f->affinity = 1; else if (max_l + max_r > 2048) { l = t = 0; for (i = 0; i < c->metric_len; i++) { l += f->licomb[i] - f->next->licomb[i]; t += ABS(f->licomb[i] - f->next->licomb[i]); } if (-l*4 > t) f->affinity = -1; else if (l*4 > t) f->affinity = 1; } } static void foo(struct pullup_context *c) { struct pullup_field *f = c->first; int i, n = queue_length(f, c->last); for (i = 0; i < n-1; i++) { if (i < n-3) compute_breaks(c, f); compute_affinity(c, f); f = f->next; } } static int decide_frame_length(struct pullup_context *c) { int n; struct pullup_field *f0 = c->first; struct pullup_field *f1 = f0->next; struct pullup_field *f2 = f1->next; struct pullup_field *f3 = f2->next; struct pullup_field *f4 = f3->next; struct pullup_field *f5 = f4->next; if (queue_length(c->first, c->last) < 6) return 0; foo(c); n = find_first_break(f0, 3); if (f0->affinity == -1) return 1; switch (n) { case 1: return 1; case 2: if (f1->affinity == 1) return 1; else return 2; case 3: if (f1->affinity == -1) return 2; else if (f1->affinity == 1) return 1; else return 3; default: if (f1->affinity == 1) return 1; else if (f1->affinity == -1) return 2; else if (f2->affinity == 1) return 2; else if (f0->affinity == 1 && f2->affinity == -1) return 3; else if (f2->affinity == -1) return 1; else if (f2->affinity == 0 && f3->affinity == 1) return 3; else return 2; } } static void print_aff_and_breaks(struct pullup_context *c, struct pullup_field *f) { int i; int max_l, max_r, l; struct pullup_field *f0 = f; const char aff_l[] = "+..", aff_r[] = "..+"; printf("\naffinity: "); for (i = 0; i < 6; i++) { printf("%c%d%c", aff_l[1+f->affinity], i, aff_r[1+f->affinity]); f = f->next; } f = f0; printf("\nbreaks: "); for (i=0; i<6; i++) { printf("%c%d%c", f->breaks & BREAK_LEFT ? '|' : '.', i, f->breaks & BREAK_RIGHT ? '|' : '.'); f = f->next; } printf("\n"); } struct pullup_frame *pullup_get_frame(struct pullup_context *c) { int i; struct pullup_frame *fr = c->frame; int n = decide_frame_length(c); if (!n) return 0; if (fr->lock) return 0; if (c->verbose) { print_aff_and_breaks(c, c->first); printf("duration: %d \n", n); } fr->lock++; fr->length = n; fr->parity = c->first->parity; fr->buffer = 0; for (i = 0; i < n; i++) { /* We cheat and steal the buffer without release+relock */ fr->fields[i] = c->first->buffer; c->first->buffer = 0; c->first = c->first->next; } /* Export the entire frame as one buffer, if possible! */ if (n == 2 && fr->fields[0] == fr->fields[1]) { fr->buffer = fr->fields[0]; pullup_lock_buffer(fr->buffer, 2); return fr; } /* (loop is in case we ever support frames longer than 3 fields) */ for (i = 1; i < n-1; i++) { if (fr->fields[i] == fr->fields[i-1] || fr->fields[i] == fr->fields[i+1]) { fr->buffer = fr->fields[i]; pullup_lock_buffer(fr->buffer, 2); break; } } return fr; } static void copy_field(struct pullup_context *c, struct pullup_buffer *dest, struct pullup_buffer *src, int parity) { int i, j; unsigned char *d, *s; for (i = 0; i < c->nplanes; i++) { s = src->planes[i] + parity*c->stride[i]; d = dest->planes[i] + parity*c->stride[i]; for (j = c->h[i]>>1; j; j--) { memcpy(d, s, c->stride[i]); s += c->stride[i]<<1; d += c->stride[i]<<1; } } } void pullup_pack_frame(struct pullup_context *c, struct pullup_frame *fr) { int i; int par = fr->parity; if (fr->buffer) return; if (fr->length < 2) return; /* FIXME: deal with this */ for (i = 0; i < fr->length; i++) { if (fr->fields[i]->lock[par ^ (i&1) ^ 1]) continue; fr->buffer = fr->fields[i]; pullup_lock_buffer(fr->buffer, 2); copy_field(c, fr->buffer, fr->fields[i+(i>0?-1:1)], par^(i&1)^1); return; } fr->buffer = pullup_get_buffer(c, 2); copy_field(c, fr->buffer, fr->fields[0], par); copy_field(c, fr->buffer, fr->fields[1], par^1); } void pullup_release_frame(struct pullup_frame *fr) { int i; for (i = 0; i < fr->length; i++) pullup_release_buffer(fr->fields[i], fr->parity ^ (i&1)); if (fr->buffer) pullup_release_buffer(fr->buffer, 2); fr->lock--; } struct pullup_context *pullup_alloc_context() { struct pullup_context *c; c = calloc(1, sizeof(struct pullup_context)); return c; } void pullup_preinit_context(struct pullup_context *c) { c->bpp = calloc(c->nplanes, sizeof(int)); c->w = calloc(c->nplanes, sizeof(int)); c->h = calloc(c->nplanes, sizeof(int)); c->stride = calloc(c->nplanes, sizeof(int)); c->background = calloc(c->nplanes, sizeof(int)); } void pullup_init_context(struct pullup_context *c) { int mp = c->metric_plane; if (c->nbuffers < 10) c->nbuffers = 10; c->buffers = calloc(c->nbuffers, sizeof (struct pullup_buffer)); c->metric_w = (c->w[mp] - (c->junk_left + c->junk_right << 3)) >> 3; c->metric_h = (c->h[mp] - (c->junk_top + c->junk_bottom << 1)) >> 3; c->metric_offset = c->junk_left*c->bpp[mp] + (c->junk_top<<1)*c->stride[mp]; c->metric_len = c->metric_w * c->metric_h; c->head = make_field_queue(c, 8); c->frame = calloc(1, sizeof (struct pullup_frame)); c->frame->fields = calloc(3, sizeof (struct pullup_buffer *)); switch(c->format) { case PULLUP_FMT_Y: c->diff = diff_y; c->licomb = licomb_y; #ifdef HAVE_MMX if (c->cpu & PULLUP_CPU_MMX) { c->diff = diff_y_mmx; c->licomb = licomb_y_mmx; } #endif break; #if 0 case PULLUP_FMT_YUY2: c->diff = diff_yuy2; break; case PULLUP_FMT_RGB32: c->diff = diff_rgb32; break; #endif } } void pullup_free_context(struct pullup_context *c) { struct pullup_field *f; free(c->buffers); f = c->head; do { free(f->diffs); free(f->licomb); f = f->next; free(f->prev); } while (f != c->head); free(c->frame); free(c); }