libavcodec.hg: vc1.c comparison

comparison vc1.c @ 3367:8c7b8ffc2485 libavcodec

Some optimization and fixes - mostly reworked MC and bitplanes.

author	kostya
date	Thu, 29 Jun 2006 11:05:33 +0000
parents	c59aa4cdf042
children	ad7215e207f8

comparison

equal deleted inserted replaced

-:c59aa4cdf042
+:8c7b8ffc2485
 CS_MID_RATE_INTRA,
 CS_MID_RATE_INTER,
 CS_HIGH_RATE_INTRA,
 CS_HIGH_RATE_INTER
 };
-/** Bitplane struct
-* We mainly need data and is_raw, so this struct could be avoided
-* to save a level of indirection; feel free to modify
-* @fixme For now, stride=width
-* @warning Data are bits, either 1 or 0
-*/
-typedef struct BitPlane {
-uint8_t *data;      ///< Data buffer
-int width;          ///< Width of the buffer
-int stride;         ///< Stride of the buffer
-int height;         ///< Plane height
-uint8_t is_raw;     ///< Bit values must be read at MB level
-} BitPlane;
-/** Block data for DC/AC prediction
-*/
-typedef struct Block {
-uint16_t dc;
-int16_t hor_ac[7];
-int16_t vert_ac[7];
-int16_t dcstep, step;
-} Block;
 /** The VC1 Context
 * @fixme Change size wherever another size is more efficient
 * Many members are only used for Advanced Profile
 */
 uint8_t mvrange;
 uint8_t pquantizer;           ///< Uniform (over sequence) quantizer in use
 uint8_t *previous_line_cbpcy; ///< To use for predicted CBPCY
 VLC *cbpcy_vlc;               ///< CBPCY VLC table
 int tt_index;                 ///< Index for Transform Type tables
-BitPlane mv_type_mb_plane;    ///< bitplane for mv_type == (4MV)
+uint8_t* mv_type_mb_plane;    ///< bitplane for mv_type == (4MV)
-BitPlane skip_mb_plane;       ///< bitplane for skipped MBs
+uint8_t* skip_mb_plane;       ///< bitplane for skipped MBs
-BitPlane direct_mb_plane;     ///< bitplane for "direct" MBs
+//    BitPlane direct_mb_plane;     ///< bitplane for "direct" MBs
+int mv_type_is_raw;           ///< mv type mb plane is not coded
+int skip_is_raw;              ///< skip mb plane is not coded
 /** Frame decoding info for S/M profiles only */
 //@{
 uint8_t rangeredfrm; ///< out_sample = CLIP((in_sample-128)*2+128)
 uint8_t interpfrm;
 uint8_t uvsamp;
 uint8_t postproc;
 int hrd_num_leaky_buckets;
 uint8_t bit_rate_exponent;
 uint8_t buffer_size_exponent;
-BitPlane ac_pred_plane;       ///< AC prediction flags bitplane
+//    BitPlane ac_pred_plane;       ///< AC prediction flags bitplane
-BitPlane over_flags_plane;    ///< Overflags bitplane
+//    BitPlane over_flags_plane;    ///< Overflags bitplane
 uint8_t condover;
 uint16_t *hrd_rate, *hrd_buffer;
 uint8_t *hrd_fullness;
 uint8_t range_mapy_flag;
 uint8_t range_mapuv_flag;
 static int vc1_init_common(VC1Context *v)
 {
 static int done = 0;
 int i = 0;
-/* Set the bit planes */
-v->mv_type_mb_plane = (struct BitPlane) { NULL, 0, 0, 0 };
-v->direct_mb_plane = (struct BitPlane) { NULL, 0, 0, 0 };
-v->skip_mb_plane = (struct BitPlane) { NULL, 0, 0, 0 };
-v->ac_pred_plane = v->over_flags_plane = (struct BitPlane) { NULL, 0, 0, 0 };
 v->hrd_rate = v->hrd_buffer = NULL;
 /* VLC tables */
 if(!done)
 {
 IMODE_ROWSKIP,
 IMODE_COLSKIP
 };
 /** @} */ //imode defines
-/** Allocate the buffer from a bitplane, given its dimensions
-* @param bp Bitplane which buffer is to allocate
-* @param[in] width Width of the buffer
-* @param[in] height Height of the buffer
-* @return Status
-* @todo TODO: Take into account stride
-* @todo TODO: Allow use of external buffers ?
-*/
-static int alloc_bitplane(BitPlane *bp, int width, int height)
-{
-if (!bp || bp->width<0 || bp->height<0) return -1;
-bp->data = (uint8_t*)av_malloc(width*height);
-if (!bp->data) return -1;
-bp->width = bp->stride = width;
-bp->height = height;
-return 0;
-}
-/** Free the bitplane's buffer
-* @param bp Bitplane which buffer is to free
-*/
-static void free_bitplane(BitPlane *bp)
-{
-bp->width = bp->stride = bp->height = 0;
-if (bp->data) av_freep(&bp->data);
-}
 /** Decode rows by checking if they are skipped
 * @param plane Buffer to store decoded bits
 * @param[in] width Width of this buffer
 * @param[in] height Height of this buffer
 * @param[in] stride of this buffer
 * @param v VC-1 context for bit reading and logging
 * @return Status
 * @fixme FIXME: Optimize
 * @todo TODO: Decide if a struct is needed
 */
-static int bitplane_decoding(BitPlane *bp, VC1Context *v)
+static int bitplane_decoding(uint8_t* data, int *raw_flag, VC1Context *v)
 {
 GetBitContext *gb = &v->s.gb;
 int imode, x, y, code, offset;
-uint8_t invert, *planep = bp->data;
+uint8_t invert, *planep = data;
+int width, height, stride;
+width = v->s.mb_width;
+height = v->s.mb_height;
+stride = v->s.mb_stride;
 invert = get_bits(gb, 1);
 imode = get_vlc2(gb, vc1_imode_vlc.table, VC1_IMODE_VLC_BITS, 1);
-bp->is_raw = 0;
+*raw_flag = 0;
 switch (imode)
 {
 case IMODE_RAW:
 //Data is actually read in the MB layer (same for all tests == "raw")
-bp->is_raw = 1; //invert ignored
+*raw_flag = 1; //invert ignored
 return invert;
 case IMODE_DIFF2:
 case IMODE_NORM2:
-if ((bp->height * bp->width) & 1)
+if ((height * width) & 1)
 {
 *planep++ = get_bits(gb, 1);
 offset = 1;
 }
 else offset = 0;
 // decode bitplane as one long line
-for (y = offset; y < bp->height * bp->width; y += 2) {
+for (y = offset; y < height * width; y += 2) {
 code = get_vlc2(gb, vc1_norm2_vlc.table, VC1_NORM2_VLC_BITS, 1);
 *planep++ = code & 1;
 offset++;
-if(offset == bp->width) {
+if(offset == width) {
 offset = 0;
-planep += bp->stride - bp->width;
+planep += stride - width;
 }
 *planep++ = code >> 1;
 offset++;
-if(offset == bp->width) {
+if(offset == width) {
 offset = 0;
-planep += bp->stride - bp->width;
+planep += stride - width;
 }
 }
 break;
 case IMODE_DIFF6:
 case IMODE_NORM6:
-if(!(bp->height % 3) && (bp->width % 3)) { // use 2x3 decoding
+if(!(height % 3) && (width % 3)) { // use 2x3 decoding
-for(y = 0; y < bp->height; y+= 3) {
+for(y = 0; y < height; y+= 3) {
-for(x = bp->width & 1; x < bp->width; x += 2) {
+for(x = width & 1; x < width; x += 2) {
 code = get_vlc2(gb, vc1_norm6_vlc.table, VC1_NORM6_VLC_BITS, 2);
 if(code < 0){
 av_log(v->s.avctx, AV_LOG_DEBUG, "invalid NORM-6 VLC\n");
 return -1;
 }
 planep[x + 0] = (code >> 0) & 1;
 planep[x + 1] = (code >> 1) & 1;
-planep[x + 0 + bp->stride] = (code >> 2) & 1;
+planep[x + 0 + stride] = (code >> 2) & 1;
-planep[x + 1 + bp->stride] = (code >> 3) & 1;
+planep[x + 1 + stride] = (code >> 3) & 1;
-planep[x + 0 + bp->stride * 2] = (code >> 4) & 1;
+planep[x + 0 + stride * 2] = (code >> 4) & 1;
-planep[x + 1 + bp->stride * 2] = (code >> 5) & 1;
+planep[x + 1 + stride * 2] = (code >> 5) & 1;
 }
-planep += bp->stride * 3;
+planep += stride * 3;
 }
-if(bp->width & 1) decode_colskip(bp->data, 1, bp->height, bp->stride, &v->s.gb);
+if(width & 1) decode_colskip(data, 1, height, stride, &v->s.gb);
 } else { // 3x2
-for(y = bp->height & 1; y < bp->height; y += 2) {
+for(y = height & 1; y < height; y += 2) {
-for(x = bp->width % 3; x < bp->width; x += 3) {
+for(x = width % 3; x < width; x += 3) {
 code = get_vlc2(gb, vc1_norm6_vlc.table, VC1_NORM6_VLC_BITS, 2);
 if(code < 0){
 av_log(v->s.avctx, AV_LOG_DEBUG, "invalid NORM-6 VLC\n");
 return -1;
 }
 planep[x + 0] = (code >> 0) & 1;
 planep[x + 1] = (code >> 1) & 1;
 planep[x + 2] = (code >> 2) & 1;
-planep[x + 0 + bp->stride] = (code >> 3) & 1;
+planep[x + 0 + stride] = (code >> 3) & 1;
-planep[x + 1 + bp->stride] = (code >> 4) & 1;
+planep[x + 1 + stride] = (code >> 4) & 1;
-planep[x + 2 + bp->stride] = (code >> 5) & 1;
+planep[x + 2 + stride] = (code >> 5) & 1;
 }
-planep += bp->stride * 2;
+planep += stride * 2;
 }
-x = bp->width % 3;
+x = width % 3;
-if(x) decode_colskip(bp->data  ,             x, bp->height    , bp->stride, &v->s.gb);
+if(x) decode_colskip(data  ,             x, height    , stride, &v->s.gb);
-if(bp->height & 1) decode_rowskip(bp->data+x, bp->width - x, bp->height & 1, bp->stride, &v->s.gb);
+if(height & 1) decode_rowskip(data+x, width - x, 1, stride, &v->s.gb);
 }
 break;
 case IMODE_ROWSKIP:
-decode_rowskip(bp->data, bp->width, bp->height, bp->stride, &v->s.gb);
+decode_rowskip(data, width, height, stride, &v->s.gb);
 break;
 case IMODE_COLSKIP:
-decode_colskip(bp->data, bp->width, bp->height, bp->stride, &v->s.gb);
+decode_colskip(data, width, height, stride, &v->s.gb);
 break;
 default: break;
 }
 /* Applying diff operator */
 if (imode == IMODE_DIFF2 || imode == IMODE_DIFF6)
 {
-planep = bp->data;
+planep = data;
 planep[0] ^= invert;
-for (x=1; x<bp->width; x++)
+for (x=1; x<width; x++)
 planep[x] ^= planep[x-1];
-for (y=1; y<bp->height; y++)
+for (y=1; y<height; y++)
 {
-planep += bp->stride;
+planep += stride;
-planep[0] ^= planep[-bp->stride];
+planep[0] ^= planep[-stride];
-for (x=1; x<bp->width; x++)
+for (x=1; x<width; x++)
 {
-if (planep[x-1] != planep[x-bp->stride]) planep[x] ^= invert;
+if (planep[x-1] != planep[x-stride]) planep[x] ^= invert;
-else                                     planep[x] ^= planep[x-1];
+else                                 planep[x] ^= planep[x-1];
 }
 }
 }
 else if (invert)
 {
-planep = bp->data;
+planep = data;
-for (x=0; x<bp->width*bp->height; x++) planep[x] = !planep[x]; //FIXME stride
+for (x=0; x<stride*height; x++) planep[x] = !planep[x]; //FIXME stride
 }
 return (imode<<1) + invert;
 }
 /** @} */ //Bitplane group
 /***********************************************************************/
 /** VOP Dquant decoding
 * @param v VC-1 Context
 for(i = 0; i < 64; i++)
 block[i] += 128;
 }
+static void vc1_v_overlap(uint8_t* src, int stride)
+{
+int i;
+int a, b, c, d;
+for(i = 0; i < 8; i++) {
+a = src[-2*stride];
+b = src[-stride];
+c = src[0];
+d = src[stride];
+src[-2*stride] = (7*a + d) >> 3;
+src[-stride] = (-a + 7*b + c + d) >> 3;
+src[0] = (a + b + 7*c - d) >> 3;
+src[stride] = (a + 7*d) >> 3;
+src++;
+}
+}
+static void vc1_h_overlap(uint8_t* src, int stride)
+{
+int i;
+int a, b, c, d;
+for(i = 0; i < 8; i++) {
+a = src[-2];
+b = src[-1];
+c = src[0];
+d = src[1];
+src[-2] = (7*a + d) >> 3;
+src[-1] = (-a + 7*b + c + d) >> 3;
+src[0] = (a + b + 7*c - d) >> 3;
+src[1] = (a + 7*d) >> 3;
+src += stride;
+}
+}
 /** Put block onto picture
 * @todo move to DSPContext
 */
 static void vc1_put_block(VC1Context *v, DCTELEM block[6][64])
 {
 dsp->put_pixels_clamped(block[4], v->s.dest[1], us);
 dsp->put_pixels_clamped(block[5], v->s.dest[2], vs);
 }
+/* clip motion vector as specified in 8.3.6.5 */
+#define CLIP_RANGE(mv, src, lim, bs)      \
+if(mv < -bs) mv = -bs - src * bs; \
+if(mv > lim) mv = lim - src * bs;
 /** Do motion compensation over 1 macroblock
 * Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
 */
 static void vc1_mc_1mv(VC1Context *v)
 {
 MpegEncContext *s = &v->s;
 DSPContext *dsp = &v->s.dsp;
 uint8_t *srcY, *srcU, *srcV;
-int dxy, mx, my, src_x, src_y;
+int dxy, uvdxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
-int width = s->mb_width * 16, height = s->mb_height * 16;
 if(!v->s.last_picture.data[0])return;
-mx = s->mv[0][0][0] >> s->mspel;
+mx = s->mv[0][0][0];
-my = s->mv[0][0][1] >> s->mspel;
+my = s->mv[0][0][1];
+uvmx = (mx + ((mx & 3) == 3)) >> 1;
+uvmy = (my + ((my & 3) == 3)) >> 1;
 srcY = s->last_picture.data[0];
 srcU = s->last_picture.data[1];
 srcV = s->last_picture.data[2];
-if(s->mspel) { // hpel mc
+if(v->fastuvmc) { // XXX: 8.3.5.4.5 specifies something different
+uvmx = (uvmx + 1) >> 1;
+uvmy = (uvmy + 1) >> 1;
+}
+src_x = s->mb_x * 16 + (mx >> 2);
+src_y = s->mb_y * 16 + (my >> 2);
+uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
+uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
+CLIP_RANGE(  src_x, s->mb_x, s->mb_width  * 16, 16);
+CLIP_RANGE(  src_y, s->mb_y, s->mb_height * 16, 16);
+CLIP_RANGE(uvsrc_x, s->mb_x, s->mb_width  *  8,  8);
+CLIP_RANGE(uvsrc_y, s->mb_y, s->mb_height *  8,  8);
+srcY += src_y * s->linesize + src_x;
+srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
+srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
+if((unsigned)src_x > s->h_edge_pos - (mx&3) - 16
+|| (unsigned)src_y > s->v_edge_pos - (my&3) - 16){
+uint8_t *uvbuf= s->edge_emu_buffer + 18 * s->linesize;
+ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, 16+1, 16+1,
+src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+srcY = s->edge_emu_buffer;
+ff_emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, 8+1, 8+1,
+uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+ff_emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8+1, 8+1,
+uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+srcU = uvbuf;
+srcV = uvbuf + 16;
+}
+if(!s->quarter_sample) { // hpel mc
+mx >>= 1;
+my >>= 1;
+uvmx >>= 1;
+uvmy >>= 1;
 dxy = ((my & 1) << 1) | (mx & 1);
-src_x = s->mb_x * 16 + (mx >> 1);
+uvdxy = 0;
-src_y = s->mb_y * 16 + (my >> 1);
-/*        src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
-if (src_x == width)
-dxy &= ~1;
-src_y = clip(src_y, -16, height);
-if (src_y == height)
-dxy &= ~2;*/
-srcY += src_y * s->linesize + src_x;
-srcU += (src_y >> 1) * s->uvlinesize + (src_x >> 1);
-srcV += (src_y >> 1) * s->uvlinesize + (src_x >> 1);
-if((unsigned)src_x > s->h_edge_pos - (mx&1) - 16
-|| (unsigned)src_y > s->v_edge_pos - (my&1) - 16){
-uint8_t *uvbuf= s->edge_emu_buffer + 18 * s->linesize;
-ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, 16+1, 16+1,
-src_x, src_y, s->h_edge_pos, s->v_edge_pos);
-srcY = s->edge_emu_buffer;
-ff_emulated_edge_mc(uvbuf, srcU, s->uvlinesize, 8+1, 8+1,
-src_x >> 1, src_y >> 1, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-ff_emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8+1, 8+1,
-src_x >> 1, src_y >> 1, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-srcU = uvbuf;
-srcV = uvbuf + 16;
-}
 dsp->put_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
-dsp->put_no_rnd_pixels_tab[1][0](s->dest[1], srcU, s->uvlinesize, 8);
-dsp->put_no_rnd_pixels_tab[1][0](s->dest[2], srcV, s->uvlinesize, 8);
 } else {
-int motion_x = mx, motion_y = my, uvdxy, uvsrc_x, uvsrc_y;
+dxy = ((my & 3) << 2) | (mx & 3);
-dxy = ((motion_y & 3) << 2) | (motion_x & 3);
+uvdxy = ((uvmy & 1) << 1) | (uvmx & 1);
-src_x = s->mb_x * 16 + (mx >> 2);
-src_y = s->mb_y * 16 + (my >> 2);
-mx= motion_x/2;
-my= motion_y/2;
-mx= (mx>>1)|(mx&1);
-my= (my>>1)|(my&1);
-uvdxy= (mx&1) | ((my&1)<<1);
-mx>>=1;
-my>>=1;
-uvsrc_x = s->mb_x * 8 + mx;
-uvsrc_y = s->mb_y * 8 + my;
-srcY = s->last_picture.data[0] +   src_y *   s->linesize +   src_x;
-srcU = s->last_picture.data[1] + uvsrc_y * s->uvlinesize + uvsrc_x;
-srcV = s->last_picture.data[2] + uvsrc_y * s->uvlinesize + uvsrc_x;
-if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
-|| (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 16  ){
-uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
-ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, 17, 17,
-src_x, src_y, s->h_edge_pos, s->v_edge_pos);
-srcY = s->edge_emu_buffer;
-ff_emulated_edge_mc(uvbuf, srcU, s->uvlinesize, 9, 9,
-uvsrc_x, uvsrc_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-ff_emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 9, 9,
-uvsrc_x, uvsrc_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-srcU = uvbuf;
-srcV = uvbuf + 16;
-}
 dsp->put_no_rnd_qpel_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize);
-dsp->put_no_rnd_pixels_tab[1][uvdxy](s->dest[1], srcU, s->uvlinesize, 8);
+}
-dsp->put_no_rnd_pixels_tab[1][uvdxy](s->dest[2], srcV, s->uvlinesize, 8);
+dsp->put_mspel_pixels_tab[uvdxy](s->dest[1], srcU, s->uvlinesize);
-}
+dsp->put_mspel_pixels_tab[uvdxy](s->dest[2], srcV, s->uvlinesize);
 }
 /**
 * Decode Simple/Main Profiles sequence header
 * @see Figure 7-8, p16-17
 v->mv_mode2 = mv_pmode_table[lowquant][get_prefix(gb, 1, 3)];
 v->lumscale = get_bits(gb, 6);
 v->lumshift = get_bits(gb, 6);
 }
 if(v->mv_mode == MV_PMODE_1MV_HPEL || v->mv_mode == MV_PMODE_1MV_HPEL_BILIN)
-v->s.mspel = 1;
+v->s.quarter_sample = 0;
 else
-v->s.mspel = 0;
+v->s.quarter_sample = 1;
 if(v->mv_mode != MV_PMODE_1MV && v->mv_mode != MV_PMODE_1MV_HPEL && v->mv_mode != MV_PMODE_1MV_HPEL_BILIN) {
 av_log(v->s.avctx, AV_LOG_ERROR, "Only 1MV P-frames are supported by now\n");
 return -1;
 }
 if ((v->mv_mode == MV_PMODE_INTENSITY_COMP &&
 v->mv_mode2 == MV_PMODE_MIXED_MV)
 || v->mv_mode == MV_PMODE_MIXED_MV)
 {
-status = bitplane_decoding(&v->mv_type_mb_plane, v);
+status = bitplane_decoding(v->mv_type_mb_plane, &v->mv_type_is_raw, v);
 if (status < 0) return -1;
 av_log(v->s.avctx, AV_LOG_DEBUG, "MB MV Type plane encoding: "
 "Imode: %i, Invert: %i\n", status>>1, status&1);
-}
+} else {
-status = bitplane_decoding(&v->skip_mb_plane, v);
+v->mv_type_is_raw = 0;
+memset(v->mv_type_mb_plane, 0, v->s.mb_stride * v->s.mb_height);
+}
+status = bitplane_decoding(v->skip_mb_plane, &v->skip_is_raw, v);
 if (status < 0) return -1;
 av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
 "Imode: %i, Invert: %i\n", status>>1, status&1);
 /* Hopefully this is correct for P frames */
 else mb_has_coeffs = 0;                                           \
 s->mb_intra = 0;                                                  \
 if (!index) { _dmv_x = _dmv_y = 0; }                              \
 else if (index == 35)                                             \
 {                                                                 \
-_dmv_x = get_bits(gb, v->k_x - s->mspel);                       \
+_dmv_x = get_bits(gb, v->k_x - 1 + s->quarter_sample);          \
-_dmv_y = get_bits(gb, v->k_y - s->mspel);                       \
+_dmv_y = get_bits(gb, v->k_y - 1 + s->quarter_sample);          \
 }                                                                 \
 else if (index == 36)                                             \
 {                                                                 \
 _dmv_x = 0;                                                     \
 _dmv_y = 0;                                                     \
 s->mb_intra = 1;                                                \
 }                                                                 \
 else                                                              \
 {                                                                 \
 index1 = index%6;                                               \
-if (s->mspel && index1 == 5) val = 1;                           \
+if (!s->quarter_sample && index1 == 5) val = 1;                 \
-else                         val = 0;                           \
+else                                   val = 0;                 \
 if(size_table[index1] - val > 0)                                \
 val = get_bits(gb, size_table[index1] - val);               \
 else                                   val = 0;                 \
 sign = 0 - (val&1);                                             \
 _dmv_x = (sign ^ ((val>>1) + offset_table[index1])) - sign;     \
 \
 index1 = index/6;                                               \
-if (s->mspel && index1 == 5) val = 1;                           \
+if (!s->quarter_sample && index1 == 5) val = 1;                 \
-else                         val = 0;                           \
+else                                   val = 0;                 \
 if(size_table[index1] - val > 0)                                \
 val = get_bits(gb, size_table[index1] - val);               \
 else                                   val = 0;                 \
 sign = 0 - (val&1);                                             \
 _dmv_y = (sign ^ ((val>>1) + offset_table[index1])) - sign;     \
 int px, py;
 int sum;
 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
 /* scale MV difference to be quad-pel */
-dmv_x <<= s->mspel;
+dmv_x <<= 1 - s->quarter_sample;
-dmv_y <<= s->mspel;
+dmv_y <<= 1 - s->quarter_sample;
 wrap = s->b8_stride;
 xy = s->block_index[0];
 C = s->current_picture.motion_val[0][xy - (1 << mv1)];
 lst = index >= vc1_last_decode_table[codingset];
 if(get_bits(gb, 1))
 level = -level;
 } else {
 escape = decode210(gb);
-if (escape == 0) {
+if (escape != 2) {
 index = get_vlc2(gb, vc1_ac_coeff_table[codingset].table, AC_VLC_BITS, 3);
 run = vc1_index_decode_table[codingset][index][0];
 level = vc1_index_decode_table[codingset][index][1];
 lst = index >= vc1_last_decode_table[codingset];
-if(lst)
+if(escape == 0) {
-level += vc1_last_delta_level_table[codingset][run];
+if(lst)
-else
+level += vc1_last_delta_level_table[codingset][run];
-level += vc1_delta_level_table[codingset][run];
+else
-if(get_bits(gb, 1))
+level += vc1_delta_level_table[codingset][run];
-level = -level;
+} else {
-} else if (escape == 1) {
+if(lst)
-index = get_vlc2(gb, vc1_ac_coeff_table[codingset].table, AC_VLC_BITS, 3);
+run += vc1_last_delta_run_table[codingset][level] + 1;
-run = vc1_index_decode_table[codingset][index][0];
+else
-level = vc1_index_decode_table[codingset][index][1];
+run += vc1_delta_run_table[codingset][level] + 1;
-lst = index >= vc1_last_decode_table[codingset];
+}
-if(lst)
-run += vc1_last_delta_run_table[codingset][level] + 1;
-else
-run += vc1_delta_run_table[codingset][level] + 1;
 if(get_bits(gb, 1))
 level = -level;
 } else {
 int sign;
 lst = get_bits(gb, 1);
 break;
 block[zz_table[i++]] = value;
 }
 /* apply AC prediction if needed */
-if(s->ac_pred) {
+if(s->ac_pred && (v->a_avail || v->c_avail)) {
 /* scale predictors if needed*/
 int mb_pos2, q1, q2;
 mb_pos2 = mb_pos - dc_pred_dir - (1 - dc_pred_dir) * s->mb_stride;
 q1 = s->current_picture.qscale_table[mb_pos];
 }
 if(!a_avail) {
 memset(ac_val + 8, 0, 8 * sizeof(ac_val[0]));
 dc_pred_dir = 1;
 }
-if(!q1 && q1 && q2 && q1 != q2) {
+if(q2 && q1 != q2) {
 q1 = q1 * 2 - 1;
 q2 = q2 * 2 - 1;
 if(dc_pred_dir) { //left
 for(k = 1; k < 8; k++)
 scale = mquant * 2;
 memset(ac_val2, 0, 16 * 2);
 if(dc_pred_dir) {//left
 ac_val -= 16;
-if(s->ac_pred)
+if(s->ac_pred && (v->a_avail || v->c_avail))
 memcpy(ac_val2, ac_val, 8 * 2);
 } else {//top
 ac_val -= 16 * s->block_wrap[n];
-if(s->ac_pred)
+if(s->ac_pred && (v->a_avail || v->c_avail))
 memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
 }
 /* apply AC prediction if needed */
-if(s->ac_pred) {
+if(s->ac_pred && (v->a_avail || v->c_avail)) {
 if(dc_pred_dir) { //left
 for(k = 1; k < 8; k++) {
 block[k << 3] = ac_val[k] * scale;
 if(!v->pquantizer)
 block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
 }
 if(ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT) {
 ttblk = TT_4X8;
 subblkpat = 2 - (ttblk == TT_4X8_LEFT);
 }
 switch(ttblk) {
 case TT_8X8:
 i = 0;
 last = 0;
 while (!last) {
 break;
 case TT_4X4:
 for(j = 0; j < 4; j++) {
 last = subblkpat & (1 << (3 - j));
 i = 0;
-off = (j & 1) * 4 + (j & 2) * 32;
+off = (j & 1) * 4 + (j & 2) * 16;
 while (!last) {
 vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2);
 i += skip;
 if(i > 15)
 break;
 idx = vc1_simple_progressive_4x4_zz[i++];
 block[idx + off] = value * scale;
 }
-vc1_inv_trans(block + off, 4, 4);
+if(!(subblkpat & (1 << (3 - j))))
+vc1_inv_trans(block + off, 4, 4);
 }
 break;
 case TT_8X4:
 for(j = 0; j < 2; j++) {
 last = subblkpat & (1 << (1 - j));
 if(i > 31)
 break;
 idx = vc1_simple_progressive_8x4_zz[i++];
 block[idx + off] = value * scale;
 }
-if(!(subblkpat & (1 << (1 - j)))) vc1_inv_trans(block + off, 8, 4);
+if(!(subblkpat & (1 << (1 - j))))
+vc1_inv_trans(block + off, 8, 4);
 }
 break;
 case TT_4X8:
 for(j = 0; j < 2; j++) {
 last = subblkpat & (1 << (1 - j));
 if(i > 31)
 break;
 idx = vc1_simple_progressive_8x4_zz[i++];
 block[idx + off] = value * scale;
 }
-vc1_inv_trans(block + off, 4, 8);
+if(!(subblkpat & (1 << (1 - j))))
+vc1_inv_trans(block + off, 4, 8);
 }
 break;
 }
 return 0;
 }
 */
 static int vc1_decode_p_mb(VC1Context *v, DCTELEM block[6][64])
 {
 MpegEncContext *s = &v->s;
 GetBitContext *gb = &s->gb;
-int i, j, mb_offset = s->mb_x + s->mb_y*s->mb_width; /* XXX: mb_stride */
+int i, j;
 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
 int cbp; /* cbp decoding stuff */
 int hybrid_pred; /* Prediction types */
 int mqdiff, mquant; /* MB quantization */
 int ttmb = v->ttmb; /* MB Transform type */
 int dmv_x, dmv_y; /* Differential MV components */
 int index, index1; /* LUT indices */
 int val, sign; /* temp values */
 int first_block = 1;
 int dst_idx, off;
+int skipped, fourmv;
 mquant = v->pq; /* Loosy initialization */
-if (v->mv_type_mb_plane.is_raw)
+if (v->mv_type_is_raw)
-v->mv_type_mb_plane.data[mb_offset] = get_bits(gb, 1);
+fourmv = get_bits1(gb);
-if (v->skip_mb_plane.is_raw)
+else
-v->skip_mb_plane.data[mb_offset] = get_bits(gb, 1);
+fourmv = v->mv_type_mb_plane[mb_pos];
-s->current_picture.mbskip_table[mb_pos] = v->skip_mb_plane.data[mb_offset];
+if (v->skip_is_raw)
-if (!v->mv_type_mb_plane.data[mb_offset]) /* 1MV mode */
+skipped = get_bits1(gb);
+else
+skipped = v->skip_mb_plane[mb_pos];
+if (!fourmv) /* 1MV mode */
 {
-if (!v->skip_mb_plane.data[mb_offset])
+if (!skipped)
 {
 GET_MVDATA(dmv_x, dmv_y);
 s->current_picture.mb_type[mb_pos] = s->mb_intra ? MB_TYPE_INTRA : MB_TYPE_16x16;
 vc1_pred_mv(s, dmv_x, dmv_y, 1, v->range_x, v->range_y);
 v->a_avail = 1;
 if((i == 1 || i == 3) || (s->mb_x && IS_INTRA(s->current_picture.mb_type[mb_pos - 1])))
 v->c_avail = 1;
 vc1_decode_intra_block(v, block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
-vc1_inv_trans(s->block[i], 8, 8);
+vc1_inv_trans(block[i], 8, 8);
-for(j = 0; j < 64; j++) s->block[i][j] += 128;
+for(j = 0; j < 64; j++) block[i][j] += 128;
-s->dsp.put_pixels_clamped(s->block[i], s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
+s->dsp.put_pixels_clamped(block[i], s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
 /* TODO: proper loop filtering */
-if(v->a_avail)
+if(v->pq >= 9 && v->overlap) {
-s->dsp.h263_v_loop_filter(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2), s->y_dc_scale);
+if(v->a_avail)
-if(v->c_avail)
+s->dsp.h263_v_loop_filter(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2), s->y_dc_scale);
-s->dsp.h263_h_loop_filter(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2), s->y_dc_scale);
+if(v->c_avail)
+s->dsp.h263_h_loop_filter(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2), s->y_dc_scale);
+}
 } else if(val) {
 vc1_decode_p_block(v, block[i], i, mquant, ttmb, first_block);
 if(!v->ttmbf && ttmb < 8) ttmb = -1;
 first_block = 0;
-s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
+s->dsp.add_pixels_clamped(block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
 }
 }
 }
 else //Skipped
 {
 return 0;
 }
 } //1MV mode
 else //4MV mode
 {//FIXME: looks not conforming to standard and is not even theoretically complete
-if (!v->skip_mb_plane.data[mb_offset] /* unskipped MB */)
+if (!skipped /* unskipped MB */)
 {
 int blk_intra[4], blk_coded[4];
 /* Get CBPCY */
 cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
 for (i=0; i<4; i++)
 GetBitContext gb;
 if (!avctx->extradata_size || !avctx->extradata) return -1;
 avctx->pix_fmt = PIX_FMT_YUV420P;
 v->s.avctx = avctx;
+avctx->flags |= CODEC_FLAG_EMU_EDGE;
+v->s.flags |= CODEC_FLAG_EMU_EDGE;
 if(ff_h263_decode_init(avctx) < 0)
 return -1;
 if (vc1_init_common(v) < 0) return -1;
 av_log(avctx, AV_LOG_INFO, "This decoder is not supposed to produce picture. Dont report this as a bug!\n");
 av_log(avctx, AV_LOG_INFO, "If you see a picture, don't believe your eyes.\n");
-avctx->flags |= CODEC_FLAG_EMU_EDGE;
 avctx->coded_width = avctx->width;
 avctx->coded_height = avctx->height;
 if (avctx->codec_id == CODEC_ID_WMV3)
 {
 int count = 0;
 s->mb_width = (avctx->coded_width+15)>>4;
 s->mb_height = (avctx->coded_height+15)>>4;
 /* Allocate mb bitplanes */
-if (alloc_bitplane(&v->mv_type_mb_plane, s->mb_width, s->mb_height) < 0)
+v->mv_type_mb_plane = av_malloc(s->mb_stride * s->mb_height);
-return -1;
+v->skip_mb_plane = av_malloc(s->mb_stride * s->mb_height);
-if (alloc_bitplane(&v->mv_type_mb_plane, s->mb_width, s->mb_height) < 0)
-return -1;
-if (alloc_bitplane(&v->skip_mb_plane, s->mb_width, s->mb_height) < 0)
-return -1;
-if (alloc_bitplane(&v->direct_mb_plane, s->mb_width, s->mb_height) < 0)
-return -1;
 /* For predictors */
 v->previous_line_cbpcy = (uint8_t *)av_malloc(s->mb_stride*4);
 if (!v->previous_line_cbpcy) return -1;
 /* Init coded blocks info */
 if (v->profile == PROFILE_ADVANCED)
 {
-if (alloc_bitplane(&v->over_flags_plane, s->mb_width, s->mb_height) < 0)
+//        if (alloc_bitplane(&v->over_flags_plane, s->mb_width, s->mb_height) < 0)
-return -1;
+//            return -1;
-if (alloc_bitplane(&v->ac_pred_plane, s->mb_width, s->mb_height) < 0)
+//        if (alloc_bitplane(&v->ac_pred_plane, s->mb_width, s->mb_height) < 0)
-return -1;
+//            return -1;
 }
 return 0;
 }
 VC1Context *v = avctx->priv_data;
 av_freep(&v->hrd_rate);
 av_freep(&v->hrd_buffer);
 MPV_common_end(&v->s);
-free_bitplane(&v->mv_type_mb_plane);
+av_freep(&v->mv_type_mb_plane);
-free_bitplane(&v->skip_mb_plane);
+av_freep(&v->skip_mb_plane);
-free_bitplane(&v->direct_mb_plane);
 return 0;
 }
 AVCodec vc1_decoder = {

Mercurial > libavcodec.hg

comparison vc1.c @ 3367:8c7b8ffc2485 libavcodec