Mercurial > libavcodec.hg
changeset 1527:8ffd0c00e6df libavcodec
mmx2 optimization of huffyuv median encoding
author | michael |
---|---|
date | Mon, 13 Oct 2003 17:27:30 +0000 |
parents | fcfa169fdbf8 |
children | ca634e47065c |
files | dsputil.c dsputil.h huffyuv.c i386/dsputil_mmx.c |
diffstat | 4 files changed, 70 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/dsputil.c Mon Oct 13 14:37:04 2003 +0000 +++ b/dsputil.c Mon Oct 13 17:27:30 2003 +0000 @@ -2526,6 +2526,24 @@ dst[i+0] = src1[i+0]-src2[i+0]; } +static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ + int i; + uint8_t l, lt; + + l= *left; + lt= *left_top; + + for(i=0; i<w; i++){ + const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); + lt= src1[i]; + l= src2[i]; + dst[i]= l - pred; + } + + *left= l; + *left_top= lt; +} + #define BUTTERFLY2(o1,o2,i1,i2) \ o1= (i1)+(i2);\ o2= (i1)-(i2); @@ -3007,6 +3025,7 @@ c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; + c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; c->bswap_buf= bswap_buf; #ifdef HAVE_MMX
--- a/dsputil.h Mon Oct 13 14:37:04 2003 +0000 +++ b/dsputil.h Mon Oct 13 17:27:30 2003 +0000 @@ -234,6 +234,11 @@ /* huffyuv specific */ void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); + /** + * subtract huffyuv's variant of median prediction + * note, this might read from src1[-1], src2[-1] + */ + void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w); /* (I)DCT */
--- a/huffyuv.c Mon Oct 13 14:37:04 2003 +0000 +++ b/huffyuv.c Mon Oct 13 17:27:30 2003 +0000 @@ -153,25 +153,6 @@ *left_top= lt; } -//FIXME optimize -static inline void sub_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ - int i; - uint8_t l, lt; - - l= *left; - lt= *left_top; - - for(i=0; i<w; i++){ - const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); - lt= src1[i]; - l= src2[i]; - dst[i]= l - pred; - } - - *left= l; - *left_top= lt; -} - static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){ int i; int r,g,b; @@ -999,9 +980,9 @@ lefttopy= p->data[0][3]; lefttopu= p->data[1][1]; lefttopv= p->data[2][1]; - sub_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy); - sub_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu); - sub_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv); + s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu); + s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv); encode_422_bitstream(s, width-4); y++; cy++; @@ -1011,7 +992,7 @@ if(s->bitstream_bpp==12){ while(2*cy > y){ ydst= p->data[0] + p->linesize[0]*y; - sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); encode_gray_bitstream(s, width); y++; } @@ -1021,9 +1002,9 @@ udst= p->data[1] + p->linesize[1]*cy; vdst= p->data[2] + p->linesize[2]*cy; - sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); - sub_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); - sub_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); + s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); + s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); encode_422_bitstream(s, width); }
--- a/i386/dsputil_mmx.c Mon Oct 13 14:37:04 2003 +0000 +++ b/i386/dsputil_mmx.c Mon Oct 13 17:27:30 2003 +0000 @@ -583,6 +583,43 @@ for(; i<w; i++) dst[i+0] = src1[i+0]-src2[i+0]; } + +static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ + int i=0; + uint8_t l, lt; + + asm volatile( + "1: \n\t" + "movq -1(%1, %0), %%mm0 \n\t" // LT + "movq (%1, %0), %%mm1 \n\t" // T + "movq -1(%2, %0), %%mm2 \n\t" // L + "movq (%2, %0), %%mm3 \n\t" // X + "movq %%mm2, %%mm4 \n\t" // L + "psubb %%mm0, %%mm2 \n\t" + "paddb %%mm1, %%mm2 \n\t" // L + T - LT + "movq %%mm4, %%mm5 \n\t" // L + "pmaxub %%mm1, %%mm4 \n\t" // max(T, L) + "pminub %%mm5, %%mm1 \n\t" // min(T, L) + "pminub %%mm2, %%mm4 \n\t" + "pmaxub %%mm1, %%mm4 \n\t" + "psubb %%mm4, %%mm3 \n\t" // dst - pred + "movq %%mm3, (%3, %0) \n\t" + "addl $8, %0 \n\t" + "cmpl %4, %0 \n\t" + " jb 1b \n\t" + : "+r" (i) + : "r"(src1), "r"(src2), "r"(dst), "r"(w) + ); + + l= *left; + lt= *left_top; + + dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF); + + *left_top= src1[w-1]; + *left = src2[w-1]; +} + #define LBUTTERFLY2(a1,b1,a2,b2)\ "paddw " #b1 ", " #a1 " \n\t"\ "paddw " #b2 ", " #a2 " \n\t"\ @@ -1699,6 +1736,8 @@ SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) #endif + + c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; } else if (mm_flags & MM_3DNOW) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;