# HG changeset patch # User michaelni # Date 1019910626 0 # Node ID 9f6071a87e172b20614d2b339deab9e8add88745 # Parent 9211a0c9466a2828f5f186d559884e6aab62658a fixed msmpeg4 infinite loop if buggy stream rewrote quantizer fixed bias (+10% compression/quality for h263 like codecs) qscale=1 support mpeg1 intra frames looks far less blocky added codec_id field diff -r 9211a0c9466a -r 9f6071a87e17 common.h --- a/common.h Fri Apr 26 07:18:57 2002 +0000 +++ b/common.h Sat Apr 27 12:30:26 2002 +0000 @@ -157,6 +157,9 @@ #endif /* HAVE_AV_CONFIG_H */ +/* assume b>0 */ +#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) + /* bit output */ struct PutBitContext; diff -r 9211a0c9466a -r 9f6071a87e17 h263.c --- a/h263.c Fri Apr 26 07:18:57 2002 +0000 +++ b/h263.c Sat Apr 27 12:30:26 2002 +0000 @@ -904,8 +904,26 @@ s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p // use fcodes >1 only for mpeg4 & h263 & h263p FIXME - if(s->h263_plus) s->fcode_tab= umv_fcode_tab; - else if(s->h263_pred && !s->h263_msmpeg4) s->fcode_tab= fcode_tab; + switch(s->codec_id){ + case CODEC_ID_MPEG4: + s->fcode_tab= fcode_tab; + s->min_qcoeff= -2048; + s->max_qcoeff= 2047; + break; + case CODEC_ID_H263P: + s->fcode_tab= umv_fcode_tab; + s->min_qcoeff= -128; + s->max_qcoeff= 127; + break; + default: //nothing needed default table allready set in mpegvideo.c + s->min_qcoeff= -128; + s->max_qcoeff= 127; + } + + /* h263 type bias */ + //FIXME mpeg4 mpeg quantizer + s->intra_quant_bias=0; + s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x } static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) @@ -2702,8 +2720,8 @@ s->chroma_intra_matrix[i]= v; v= ff_mpeg4_default_non_intra_matrix[i]; - s->non_intra_matrix[i]= v; - s->chroma_non_intra_matrix[i]= v; + s->inter_matrix[i]= v; + s->chroma_inter_matrix[i]= v; } /* load custom intra matrix */ @@ -2725,15 +2743,15 @@ if(v==0) break; j= zigzag_direct[i]; - s->non_intra_matrix[j]= v; - s->chroma_non_intra_matrix[j]= v; + s->inter_matrix[j]= v; + s->chroma_inter_matrix[j]= v; } /* replicate last value */ for(; i<64; i++){ j= zigzag_direct[i]; - s->non_intra_matrix[j]= v; - s->chroma_non_intra_matrix[j]= v; + s->inter_matrix[j]= v; + s->chroma_inter_matrix[j]= v; } } diff -r 9211a0c9466a -r 9f6071a87e17 h263dec.c --- a/h263dec.c Fri Apr 26 07:18:57 2002 +0000 +++ b/h263dec.c Sat Apr 27 12:30:26 2002 +0000 @@ -73,17 +73,13 @@ default: return -1; } - + s->codec_id= avctx->codec->id; + /* for h263, we allocate the images after having read the header */ if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4) if (MPV_common_init(s) < 0) return -1; - /* XXX: suppress this matrix init, only needed because using mpeg1 - dequantize in mmx case */ - for(i=0;i<64;i++) - s->non_intra_matrix[i] = default_non_intra_matrix[i]; - if (s->h263_msmpeg4) msmpeg4_decode_init_vlc(s); else @@ -251,7 +247,7 @@ if(msmpeg4_decode_ext_header(s, buf_size) < 0) return -1; /* divx 5.01+ bistream reorder stuff */ - if(s->h263_pred && s->bitstream_buffer_size==0){ + if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0){ int current_pos= get_bits_count(&s->gb)/8; if( buf_size - current_pos > 5 && buf_size - current_pos < BITSTREAM_BUFFER_SIZE){ diff -r 9211a0c9466a -r 9f6071a87e17 i386/mpegvideo_mmx.c --- a/i386/mpegvideo_mmx.c Fri Apr 26 07:18:57 2002 +0000 +++ b/i386/mpegvideo_mmx.c Sat Apr 27 12:30:26 2002 +0000 @@ -26,8 +26,6 @@ #include "../mangle.h" extern UINT8 zigzag_end[64]; -extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w); -extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale); extern UINT8 zigzag_direct_noperm[64]; extern UINT16 inv_zigzag_direct16[64]; @@ -260,7 +258,7 @@ block[0]= block0; } else { - quant_matrix = s->non_intra_matrix; + quant_matrix = s->inter_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" @@ -382,7 +380,7 @@ //Note, we dont do mismatch control for intra as errors cannot accumulate } else { - quant_matrix = s->non_intra_matrix; + quant_matrix = s->inter_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlq $48, %%mm7 \n\t" diff -r 9211a0c9466a -r 9f6071a87e17 i386/mpegvideo_mmx_template.c --- a/i386/mpegvideo_mmx_template.c Fri Apr 26 07:18:57 2002 +0000 +++ b/i386/mpegvideo_mmx_template.c Sat Apr 27 12:30:26 2002 +0000 @@ -33,149 +33,160 @@ static int RENAME(dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, - int qscale) + int qscale, int *overflow) { - int i, level, last_non_zero_p1, q; - const UINT16 *qmat; + int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ... + const UINT16 *qmat, *bias; static __align8 INT16 temp_block[64]; - int minLevel, maxLevel; - - if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){ - /* mpeg4 */ - minLevel= -2048; - maxLevel= 2047; - }else if(s->out_format==FMT_MPEG1){ - /* mpeg1 */ - minLevel= -255; - maxLevel= 255; - }else if(s->out_format==FMT_MJPEG){ - /* (m)jpeg */ - minLevel= -1023; - maxLevel= 1023; - }else{ - /* h263 / msmpeg4 */ - minLevel= -128; - maxLevel= 127; - } av_fdct (block); - + if (s->mb_intra) { int dummy; if (n < 4) q = s->y_dc_scale; else q = s->c_dc_scale; - /* note: block[0] is assumed to be positive */ #if 1 - asm volatile ( - "xorl %%edx, %%edx \n\t" - "mul %%ecx \n\t" - : "=d" (temp_block[0]), "=a"(dummy) - : "a" (block[0] + (q >> 1)), "c" (inverse[q]) - ); + asm volatile ( + "xorl %%edx, %%edx \n\t" + "mul %%ecx \n\t" + : "=d" (level), "=a"(dummy) + : "a" (block[0] + (q >> 1)), "c" (inverse[q]) + ); #else - asm volatile ( - "xorl %%edx, %%edx \n\t" - "divw %%cx \n\t" - "movzwl %%ax, %%eax \n\t" - : "=a" (temp_block[0]) - : "a" (block[0] + (q >> 1)), "c" (q) - : "%edx" - ); + asm volatile ( + "xorl %%edx, %%edx \n\t" + "divw %%cx \n\t" + "movzwl %%ax, %%eax \n\t" + : "=a" (level) + : "a" (block[0] + (q >> 1)), "c" (q) + : "%edx" + ); #endif + block[0]=0; //avoid fake overflow // temp_block[0] = (block[0] + (q >> 1)) / q; - i = 1; last_non_zero_p1 = 1; - if (s->out_format == FMT_H263) { - qmat = s->q_non_intra_matrix16; - } else { - qmat = s->q_intra_matrix16; - } - for(i=1;i<4;i++) { - level = block[i] * qmat[i]; - level = level / (1 << (QMAT_SHIFT_MMX - 3)); - /* XXX: currently, this code is not optimal. the range should be: - mpeg1: -255..255 - mpeg2: -2048..2047 - h263: -128..127 - mpeg4: -2048..2047 - */ - if (level > maxLevel) - level = maxLevel; - else if (level < minLevel) - level = minLevel; - temp_block[i] = level; - - if(level) - if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i]; - block[i]=0; - } + bias = s->q_intra_matrix16_bias[qscale]; + qmat = s->q_intra_matrix16[qscale]; } else { - i = 0; last_non_zero_p1 = 0; - qmat = s->q_non_intra_matrix16; + bias = s->q_inter_matrix16_bias[qscale]; + qmat = s->q_inter_matrix16[qscale]; } - asm volatile( /* XXX: small rounding bug, but it shouldnt matter */ - "movd %3, %%mm3 \n\t" - SPREADW(%%mm3) - "movd %4, %%mm4 \n\t" - SPREADW(%%mm4) -#ifndef HAVE_MMX2 - "movd %5, %%mm5 \n\t" - SPREADW(%%mm5) -#endif - "pxor %%mm7, %%mm7 \n\t" - "movd %%eax, %%mm2 \n\t" - SPREADW(%%mm2) - "movl %6, %%eax \n\t" - ".balign 16 \n\t" - "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm1 \n\t" - "movq %%mm0, %%mm6 \n\t" - "psraw $15, %%mm6 \n\t" - "pmulhw %%mm0, %%mm1 \n\t" - "psubsw %%mm6, %%mm1 \n\t" -#ifdef HAVE_MMX2 - "pminsw %%mm3, %%mm1 \n\t" - "pmaxsw %%mm4, %%mm1 \n\t" -#else - "paddsw %%mm3, %%mm1 \n\t" - "psubusw %%mm4, %%mm1 \n\t" - "paddsw %%mm5, %%mm1 \n\t" -#endif - "movq %%mm1, (%8, %%eax) \n\t" - "pcmpeqw %%mm7, %%mm1 \n\t" - "movq (%7, %%eax), %%mm0 \n\t" - "movq %%mm7, (%1, %%eax) \n\t" - "pandn %%mm0, %%mm1 \n\t" - PMAXW(%%mm1, %%mm2) - "addl $8, %%eax \n\t" - " js 1b \n\t" - "movq %%mm2, %%mm0 \n\t" - "psrlq $32, %%mm2 \n\t" - PMAXW(%%mm0, %%mm2) - "movq %%mm2, %%mm0 \n\t" - "psrlq $16, %%mm2 \n\t" - PMAXW(%%mm0, %%mm2) - "movd %%mm2, %%eax \n\t" - "movzbl %%al, %%eax \n\t" - : "+a" (last_non_zero_p1) - : "r" (block+64), "r" (qmat+64), -#ifdef HAVE_MMX2 - "m" (maxLevel), "m" (minLevel), "m" (minLevel /* dummy */), "g" (2*i - 128), -#else - "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel), "g" (2*i - 128), -#endif - "r" (inv_zigzag_direct16+64), "r" (temp_block+64) - ); + if(s->out_format == FMT_H263){ + + asm volatile( + "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 + SPREADW(%%mm3) + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 + "movq (%2), %%mm5 \n\t" // qmat[0] + "pxor %%mm6, %%mm6 \n\t" + "psubw (%3), %%mm6 \n\t" // -bias[0] + "movl $-128, %%eax \n\t" + ".balign 16 \n\t" + "1: \n\t" + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%eax), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movq %%mm0, (%5, %%eax) \n\t" + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%eax), %%mm1 \n\t" + "movq %%mm7, (%1, %%eax) \n\t" // 0 + "pandn %%mm1, %%mm0 \n\t" + PMAXW(%%mm0, %%mm3) + "addl $8, %%eax \n\t" + " js 1b \n\t" + "movq %%mm3, %%mm0 \n\t" + "psrlq $32, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movq %%mm3, %%mm0 \n\t" + "psrlq $16, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movd %%mm3, %%eax \n\t" + "movzbl %%al, %%eax \n\t" // last_non_zero_p1 + : "+a" (last_non_zero_p1) + : "r" (block+64), "r" (qmat), "r" (bias), + "r" (inv_zigzag_direct16+64), "r" (temp_block+64) + ); + // note the asm is split cuz gcc doesnt like that many operands ... + asm volatile( + "movd %1, %%mm1 \n\t" // max_qcoeff + SPREADW(%%mm1) + "psubusw %%mm1, %%mm4 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "movd %%mm4, %0 \n\t" // *overflow + : "=g" (*overflow) + : "g" (s->max_qcoeff) + ); + }else{ // FMT_H263 + asm volatile( + "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 + SPREADW(%%mm3) + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 + "movl $-128, %%eax \n\t" + ".balign 16 \n\t" + "1: \n\t" + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%eax), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "movq (%3, %%eax), %%mm6 \n\t" // bias[0] + "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "movq (%2, %%eax), %%mm5 \n\t" // qmat[i] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movq %%mm0, (%5, %%eax) \n\t" + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%eax), %%mm1 \n\t" + "movq %%mm7, (%1, %%eax) \n\t" // 0 + "pandn %%mm1, %%mm0 \n\t" + PMAXW(%%mm0, %%mm3) + "addl $8, %%eax \n\t" + " js 1b \n\t" + "movq %%mm3, %%mm0 \n\t" + "psrlq $32, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movq %%mm3, %%mm0 \n\t" + "psrlq $16, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movd %%mm3, %%eax \n\t" + "movzbl %%al, %%eax \n\t" // last_non_zero_p1 + : "+a" (last_non_zero_p1) + : "r" (block+64), "r" (qmat+64), "r" (bias+64), + "r" (inv_zigzag_direct16+64), "r" (temp_block+64) + ); + // note the asm is split cuz gcc doesnt like that many operands ... + asm volatile( + "movd %1, %%mm1 \n\t" // max_qcoeff + SPREADW(%%mm1) + "psubusw %%mm1, %%mm4 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "movd %%mm4, %0 \n\t" // *overflow + : "=g" (*overflow) + : "g" (s->max_qcoeff) + ); + } + + if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute // last_non_zero_p1=64; /* permute for IDCT */ asm volatile( - "movl %0, %%eax \n\t" + "movl %0, %%eax \n\t" "pushl %%ebp \n\t" "movl %%esp, " MANGLE(esp_temp) "\n\t" "1: \n\t" @@ -203,5 +214,6 @@ } */ //block_permute(block); + return last_non_zero_p1 - 1; } diff -r 9211a0c9466a -r 9f6071a87e17 mjpeg.c --- a/mjpeg.c Fri Apr 26 07:18:57 2002 +0000 +++ b/mjpeg.c Sat Apr 27 12:30:26 2002 +0000 @@ -160,6 +160,9 @@ m = malloc(sizeof(MJpegContext)); if (!m) return -1; + + s->min_qcoeff=-1023; + s->max_qcoeff= 1023; /* build all the huffman tables */ build_huffman_codes(m->huff_size_dc_luminance, diff -r 9211a0c9466a -r 9f6071a87e17 mpeg12.c --- a/mpeg12.c Fri Apr 26 07:18:57 2002 +0000 +++ b/mpeg12.c Sat Apr 27 12:30:26 2002 +0000 @@ -399,8 +399,11 @@ } } s->mv_penalty= mv_penalty; - s->fcode_tab= fcode_tab; + s->min_qcoeff=-255; + s->max_qcoeff= 255; + s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x + s->inter_quant_bias= 0; } static inline void encode_dc(MpegEncContext *s, int diff, int component) @@ -1027,9 +1030,9 @@ UINT8 *buf_ptr; i = 0; if (n < 4) - matrix = s->non_intra_matrix; + matrix = s->inter_matrix; else - matrix = s->chroma_non_intra_matrix; + matrix = s->chroma_inter_matrix; /* special case for the first coef. no need to add a second vlc table */ SAVE_BITS(&s->gb); @@ -1183,6 +1186,7 @@ s->buf_ptr = s->buffer; s->mpeg_enc_ctx.picture_number = 0; s->repeat_field = 0; + s->mpeg_enc_ctx.codec_id= avctx->codec->id; return 0; } @@ -1292,8 +1296,8 @@ for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); j = zigzag_direct[i]; - s->non_intra_matrix[j] = v; - s->chroma_non_intra_matrix[j] = v; + s->inter_matrix[j] = v; + s->chroma_inter_matrix[j] = v; } } if (get_bits1(&s->gb)) { @@ -1307,7 +1311,7 @@ for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); j = zigzag_direct[i]; - s->chroma_non_intra_matrix[j] = v; + s->chroma_inter_matrix[j] = v; } } } @@ -1386,7 +1390,6 @@ s->mb_x = -1; s->mb_y = start_code; s->mb_incr = 0; - /* start frame decoding */ if (s->first_slice) { s->first_slice = 0; @@ -1526,20 +1529,20 @@ for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); j = zigzag_direct[i]; - s->non_intra_matrix[j] = v; - s->chroma_non_intra_matrix[j] = v; + s->inter_matrix[j] = v; + s->chroma_inter_matrix[j] = v; } #ifdef DEBUG dprintf("non intra matrix present\n"); for(i=0;i<64;i++) - dprintf(" %d", s->non_intra_matrix[zigzag_direct[i]]); + dprintf(" %d", s->inter_matrix[zigzag_direct[i]]); printf("\n"); #endif } else { for(i=0;i<64;i++) { v = default_non_intra_matrix[i]; - s->non_intra_matrix[i] = v; - s->chroma_non_intra_matrix[i] = v; + s->inter_matrix[i] = v; + s->chroma_inter_matrix[i] = v; } } @@ -1566,7 +1569,7 @@ dprintf("fill_buffer\n"); *data_size = 0; - + /* special case for last picture */ if (buf_size == 0) { if (s2->picture_number > 0) { @@ -1591,7 +1594,7 @@ *data_size = sizeof(AVPicture); goto the_end; } - + while (buf_ptr < buf_end) { buf_start = buf_ptr; /* find start next code */ diff -r 9211a0c9466a -r 9f6071a87e17 mpegvideo.c --- a/mpegvideo.c Fri Apr 26 07:18:57 2002 +0000 +++ b/mpegvideo.c Sat Apr 27 12:30:26 2002 +0000 @@ -38,9 +38,9 @@ static void dct_unquantize_h263_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w); -static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); +static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); -int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale)= dct_quantize_c; +int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow)= dct_quantize_c; void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c; #define EDGE_WIDTH 16 @@ -78,29 +78,38 @@ /* default motion estimation */ int motion_estimation_method = ME_EPZS; -static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale) +static void convert_matrix(int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64], + const UINT16 *quant_matrix, int bias) { - int i; + int qscale; - if (av_fdct == jpeg_fdct_ifast) { - for(i=0;i<64;i++) { - /* 16 <= qscale * quant_matrix[i] <= 7905 */ - /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ - /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ - /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - - qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / - (aanscales[i] * qscale * quant_matrix[block_permute_op(i)])); - } - } else { - for(i=0;i<64;i++) { - /* We can safely suppose that 16 <= quant_matrix[i] <= 255 - So 16 <= qscale * quant_matrix[i] <= 7905 - so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 - so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 - */ - qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); - qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); + for(qscale=1; qscale<32; qscale++){ + int i; + if (av_fdct == jpeg_fdct_ifast) { + for(i=0;i<64;i++) { + const int j= block_permute_op(i); + /* 16 <= qscale * quant_matrix[i] <= 7905 */ + /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ + /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ + /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ + + qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / + (aanscales[i] * qscale * quant_matrix[j])); + } + } else { + for(i=0;i<64;i++) { + /* We can safely suppose that 16 <= quant_matrix[i] <= 255 + So 16 <= qscale * quant_matrix[i] <= 7905 + so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 + so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 + */ + qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); + qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); + + if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; + + qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); + } } } } @@ -388,7 +397,8 @@ s->max_b_frames= avctx->max_b_frames; s->rc_strategy= avctx->rc_strategy; s->b_frame_strategy= avctx->b_frame_strategy; - + s->codec_id= avctx->codec->id; + if (s->gop_size <= 1) { s->intra_only = 1; s->gop_size = 12; @@ -523,8 +533,21 @@ /* init default q matrix */ for(i=0;i<64;i++) { - s->intra_matrix[i] = default_intra_matrix[i]; - s->non_intra_matrix[i] = default_non_intra_matrix[i]; + if(s->out_format == FMT_H263) + s->intra_matrix[i] = default_non_intra_matrix[i]; + else + s->intra_matrix[i] = default_intra_matrix[i]; + + s->inter_matrix[i] = default_non_intra_matrix[i]; + } + + /* precompute matrix */ + /* for mjpeg, we do include qscale in the matrix */ + if (s->out_format != FMT_MJPEG) { + convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, + s->intra_matrix, s->intra_quant_bias); + convert_matrix(s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, + s->inter_matrix, s->inter_quant_bias); } if(ff_rate_control_init(s) < 0) @@ -1307,6 +1330,21 @@ emms_c(); //FIXME remove } +static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index) +{ + int i; + const int maxlevel= s->max_qcoeff; + const int minlevel= s->min_qcoeff; + + for(i=0; i<=last_index; i++){ + const int j = zigzag_direct[i]; + int level = block[j]; + + if (level>maxlevel) level=maxlevel; + else if(levely_dc_scale = 8; s->c_dc_scale = 8; } - for(i=0;i<6;i++) { - s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale); + if(s->out_format==FMT_MJPEG){ + for(i=0;i<6;i++) { + int overflow; + s->block_last_index[i] = dct_quantize(s, s->block[i], i, 8, &overflow); + if(overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]); + } + }else{ + for(i=0;i<6;i++) { + int overflow; + s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale, &overflow); + // FIXME we could decide to change to quantizer instead of clipping + if(overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]); + } } /* huffman encode */ @@ -1596,17 +1645,13 @@ else if (!s->fixed_qscale) s->qscale = ff_rate_estimate_qscale(s); - - /* precompute matrix */ if (s->out_format == FMT_MJPEG) { /* for mjpeg, we do include qscale in the matrix */ s->intra_matrix[0] = default_intra_matrix[0]; for(i=1;i<64;i++) s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3; - convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, 8); - } else { - convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->qscale); - convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale); + convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, + s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias); } s->last_bits= get_bit_count(&s->pb); @@ -1957,29 +2002,13 @@ static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, - int qscale) + int qscale, int *overflow) { int i, j, level, last_non_zero, q; const int *qmat; - int minLevel, maxLevel; - - if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){ - /* mpeg4 */ - minLevel= -2048; - maxLevel= 2047; - }else if(s->out_format==FMT_MPEG1){ - /* mpeg1 */ - minLevel= -255; - maxLevel= 255; - }else if(s->out_format==FMT_MJPEG){ - /* (m)jpeg */ - minLevel= -1023; - maxLevel= 1023; - }else{ - /* h263 / msmpeg4 */ - minLevel= -128; - maxLevel= 127; - } + int bias; + int max=0; + unsigned int threshold1, threshold2; av_fdct (block); @@ -1998,71 +2027,40 @@ block[0] = (block[0] + (q >> 1)) / q; i = 1; last_non_zero = 0; - if (s->out_format == FMT_H263) { - qmat = s->q_non_intra_matrix; - } else { - qmat = s->q_intra_matrix; - } + qmat = s->q_intra_matrix[qscale]; + bias= s->intra_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); } else { i = 0; last_non_zero = -1; - qmat = s->q_non_intra_matrix; + qmat = s->q_inter_matrix[qscale]; + bias= s->inter_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); } + threshold1= (1<<(QMAT_SHIFT - 3)) - bias - 1; + threshold2= threshold1<<1; for(;i<64;i++) { j = zigzag_direct[i]; level = block[j]; level = level * qmat[j]; -#ifdef PARANOID - { - static int count = 0; - int level1, level2, qmat1; - double val; - if (qmat == s->q_non_intra_matrix) { - qmat1 = default_non_intra_matrix[j] * s->qscale; - } else { - qmat1 = default_intra_matrix[j] * s->qscale; - } - if (av_fdct != jpeg_fdct_ifast) - val = ((double)block[j] * 8.0) / (double)qmat1; - else - val = ((double)block[j] * 8.0 * 2048.0) / - ((double)qmat1 * aanscales[j]); - level1 = (int)val; - level2 = level / (1 << (QMAT_SHIFT - 3)); - if (level1 != level2) { - fprintf(stderr, "%d: quant error qlevel=%d wanted=%d level=%d qmat1=%d qmat=%d wantedf=%0.6f\n", - count, level2, level1, block[j], qmat1, qmat[j], - val); - count++; + +// if( bias+level >= (1<<(QMAT_SHIFT - 3)) +// || bias-level >= (1<<(QMAT_SHIFT - 3))){ + if(((unsigned)(level+threshold1))>threshold2){ + if(level>0){ + level= (bias + level)>>(QMAT_SHIFT - 3); + block[j]= level; + }else{ + level= (bias - level)>>(QMAT_SHIFT - 3); + block[j]= -level; } - - } -#endif - /* XXX: slight error for the low range. Test should be equivalent to - (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 << - (QMAT_SHIFT - 3))) - */ - if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) != - level) { - level = level / (1 << (QMAT_SHIFT - 3)); - /* XXX: currently, this code is not optimal. the range should be: - mpeg1: -255..255 - mpeg2: -2048..2047 - h263: -128..127 - mpeg4: -2048..2047 - */ - if (level > maxLevel) - level = maxLevel; - else if (level < minLevel) - level = minLevel; - - block[j] = level; + max |=level; last_non_zero = i; - } else { - block[j] = 0; + }else{ + block[j]=0; } } + *overflow= s->max_qcoeff < max; //overflow might have happend + return last_non_zero; } @@ -2104,7 +2102,7 @@ } } else { i = 0; - quant_matrix = s->non_intra_matrix; + quant_matrix = s->inter_matrix; for(;inon_intra_matrix; + quant_matrix = s->inter_matrix; for(;i0)*/ enum OutputFormat out_format; /* output format */ + int h263_pred; /* use mpeg4/h263 ac/dc predictions */ + +/* the following codec id fields are deprecated in favor of codec_id */ int h263_plus; /* h263 plus headers */ int h263_rv10; /* use RV10 variation for H263 */ - int h263_pred; /* use mpeg4/h263 ac/dc predictions */ - int h263_msmpeg4; /* generate MSMPEG4 compatible stream */ + int h263_msmpeg4; /* generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)*/ int h263_intel; /* use I263 intel h263 header */ + + int codec_id; /* see CODEC_ID_xxx */ int fixed_qscale; /* fixed qscale if non zero */ float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */ float qblur; /* amount of qscale smoothing over time (0.0-1.0) */ @@ -213,14 +217,21 @@ /* matrix transmitted in the bitstream */ UINT16 intra_matrix[64]; UINT16 chroma_intra_matrix[64]; - UINT16 non_intra_matrix[64]; - UINT16 chroma_non_intra_matrix[64]; + UINT16 inter_matrix[64]; + UINT16 chroma_inter_matrix[64]; +#define QUANT_BIAS_SHIFT 4 + int intra_quant_bias; /* bias for the quantizer */ + int inter_quant_bias; /* bias for the quantizer */ + int min_qcoeff; /* minimum encodable coefficient */ + int max_qcoeff; /* maximum encodable coefficient */ /* precomputed matrix (combine qscale and DCT renorm) */ - int q_intra_matrix[64]; - int q_non_intra_matrix[64]; + int q_intra_matrix[32][64]; + int q_inter_matrix[32][64]; /* identical to the above but for MMX & these are not permutated */ - UINT16 __align8 q_intra_matrix16[64]; - UINT16 __align8 q_non_intra_matrix16[64]; + UINT16 __align8 q_intra_matrix16[32][64]; + UINT16 __align8 q_inter_matrix16[32][64]; + UINT16 __align8 q_intra_matrix16_bias[32][64]; + UINT16 __align8 q_inter_matrix16_bias[32][64]; int block_last_index[6]; /* last non zero coefficient in block */ void *opaque; /* private data for the user */ @@ -328,7 +339,7 @@ int first_slice_line; /* used in mpeg4 too to handle resync markers */ int flipflop_rounding; int bitrate; - int msmpeg4_version; /* 1=mp41, 2=mp42, 3=mp43/divx3 */ + int msmpeg4_version; /* 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 */ /* decompression specific */ GetBitContext gb; @@ -386,6 +397,8 @@ #ifdef HAVE_MMX void MPV_common_init_mmx(MpegEncContext *s); #endif +int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); +void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w); /* motion_est.c */ void ff_estimate_p_frame_motion(MpegEncContext * s, diff -r 9211a0c9466a -r 9f6071a87e17 rv10.c --- a/rv10.c Fri Apr 26 07:18:57 2002 +0000 +++ b/rv10.c Sat Apr 27 12:30:26 2002 +0000 @@ -340,6 +340,7 @@ int i; static int done; +// s->avctx= avctx; s->out_format = FMT_H263; s->width = avctx->width; @@ -351,11 +352,6 @@ if (MPV_common_init(s) < 0) return -1; - /* XXX: suppress this matrix init, only needed because using mpeg1 - dequantize in mmx case */ - for(i=0;i<64;i++) - s->non_intra_matrix[i] = default_non_intra_matrix[i]; - h263_decode_init_vlc(s); /* init rv vlc */