Mercurial > libavcodec.hg
diff i386/mpegvideo_mmx.c @ 1689:1a2db2073848 libavcodec
split intra / inter dequantization
author | michael |
---|---|
date | Sun, 14 Dec 2003 02:57:00 +0000 |
parents | 4c9fd29f1606 |
children | 4e72fb256b25 |
line wrap: on
line diff
--- a/i386/mpegvideo_mmx.c Sun Dec 14 01:42:00 2003 +0000 +++ b/i386/mpegvideo_mmx.c Sun Dec 14 02:57:00 2003 +0000 @@ -31,31 +31,29 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; -static void dct_unquantize_h263_mmx(MpegEncContext *s, +static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int level, qmul, qadd, nCoeffs; qmul = qscale << 1; - qadd = (qscale - 1) | 1; assert(s->block_last_index[n]>=0 || s->h263_aic); - if (s->mb_intra) { - if (!s->h263_aic) { - if (n < 4) - level = block[0] * s->y_dc_scale; - else - level = block[0] * s->c_dc_scale; - }else{ - qadd = 0; - level= block[0]; - } + if (!s->h263_aic) { + if (n < 4) + level = block[0] * s->y_dc_scale; + else + level = block[0] * s->c_dc_scale; + qadd = (qscale - 1) | 1; + }else{ + qadd = 0; + level= block[0]; + } + if(s->ac_pred) nCoeffs=63; - } else { - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; - level = 0;/* keep gcc quiet */ - } + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; //printf("%d %d ", qmul, qadd); asm volatile( "movd %1, %%mm6 \n\t" //qmul @@ -104,8 +102,69 @@ ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) : "memory" ); - if(s->mb_intra) - block[0]= level; + block[0]= level; +} + + +static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int level, qmul, qadd, nCoeffs; + + qmul = qscale << 1; + qadd = (qscale - 1) | 1; + + assert(s->block_last_index[n]>=0 || s->h263_aic); + + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; +//printf("%d %d ", qmul, qadd); +asm volatile( + "movd %1, %%mm6 \n\t" //qmul + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "movd %2, %%mm5 \n\t" //qadd + "pxor %%mm7, %%mm7 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "psubw %%mm5, %%mm7 \n\t" + "pxor %%mm4, %%mm4 \n\t" + ".balign 16\n\t" + "1: \n\t" + "movq (%0, %3), %%mm0 \n\t" + "movq 8(%0, %3), %%mm1 \n\t" + + "pmullw %%mm6, %%mm0 \n\t" + "pmullw %%mm6, %%mm1 \n\t" + + "movq (%0, %3), %%mm2 \n\t" + "movq 8(%0, %3), %%mm3 \n\t" + + "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + + "paddw %%mm7, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + + "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 + + "pandn %%mm2, %%mm0 \n\t" + "pandn %%mm3, %%mm1 \n\t" + + "movq %%mm0, (%0, %3) \n\t" + "movq %%mm1, 8(%0, %3) \n\t" + + "addl $16, %3 \n\t" + "jng 1b \n\t" + ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) + : "memory" + ); } @@ -138,24 +197,23 @@ high3:low3 = low1*low2 high3 += tlow1 */ -static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, +static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int nCoeffs; const uint16_t *quant_matrix; + int block0; assert(s->block_last_index[n]>=0); nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; - if (s->mb_intra) { - int block0; - if (n < 4) - block0 = block[0] * s->y_dc_scale; - else - block0 = block[0] * s->c_dc_scale; - /* XXX: only mpeg1 */ - quant_matrix = s->intra_matrix; + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + /* XXX: only mpeg1 */ + quant_matrix = s->intra_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" @@ -205,9 +263,19 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) : "%eax", "memory" ); - block[0]= block0; + block[0]= block0; +} - } else { +static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int nCoeffs; + const uint16_t *quant_matrix; + + assert(s->block_last_index[n]>=0); + + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; + quant_matrix = s->inter_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" @@ -262,28 +330,25 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) : "%eax", "memory" ); - } - } -static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, +static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int nCoeffs; const uint16_t *quant_matrix; + int block0; assert(s->block_last_index[n]>=0); if(s->alternate_scan) nCoeffs= 63; //FIXME else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; - if (s->mb_intra) { - int block0; - if (n < 4) - block0 = block[0] * s->y_dc_scale; - else - block0 = block[0] * s->c_dc_scale; - quant_matrix = s->intra_matrix; + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + quant_matrix = s->intra_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" @@ -329,10 +394,21 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) : "%eax", "memory" ); - block[0]= block0; + block[0]= block0; //Note, we dont do mismatch control for intra as errors cannot accumulate +} - } else { +static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int nCoeffs; + const uint16_t *quant_matrix; + + assert(s->block_last_index[n]>=0); + + if(s->alternate_scan) nCoeffs= 63; //FIXME + else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + quant_matrix = s->inter_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" @@ -397,7 +473,6 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) : "%eax", "memory" ); - } } /* draw the edges of width 'w' of an image of size width, height @@ -505,9 +580,12 @@ if (mm_flags & MM_MMX) { const int dct_algo = s->avctx->dct_algo; - s->dct_unquantize_h263 = dct_unquantize_h263_mmx; - s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; - s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx; + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx; + s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx; + s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; + s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; + s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; draw_edges = draw_edges_mmx;