# HG changeset patch # User michael # Date 1096154338 0 # Node ID 7e0b2e86afa9a955d716e3391fe49b96d41fd4ab # Parent 507690ff49a25880a7799a35f016bf1cd32efdd6 1/2 resolution decoding diff -r 507690ff49a2 -r 7e0b2e86afa9 avcodec.h --- a/avcodec.h Sat Sep 25 10:06:31 2004 +0000 +++ b/avcodec.h Sat Sep 25 23:18:58 2004 +0000 @@ -17,7 +17,7 @@ #define FFMPEG_VERSION_INT 0x000409 #define FFMPEG_VERSION "0.4.9-pre1" -#define LIBAVCODEC_BUILD 4721 +#define LIBAVCODEC_BUILD 4722 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT #define LIBAVCODEC_VERSION FFMPEG_VERSION @@ -1111,6 +1111,7 @@ #define FF_IDCT_ALTIVEC 8 #define FF_IDCT_SH4 9 #define FF_IDCT_SIMPLEARM 10 +#define FF_IDCT_INT4 11 /** * slice count. @@ -1656,6 +1657,13 @@ */ int level; #define FF_LEVEL_UNKNOWN -99 + + /** + * low resolution decoding. 1-> 1/2 size, 2->1/4 size + * - encoding: unused + * - decoding: set by user + */ + int lowres; } AVCodecContext; diff -r 507690ff49a2 -r 7e0b2e86afa9 dsputil.c --- a/dsputil.c Sat Sep 25 10:06:31 2004 +0000 +++ b/dsputil.c Sat Sep 25 23:18:58 2004 +0000 @@ -446,6 +446,24 @@ } } +static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + pixels[2] = cm[block[2]]; + pixels[3] = cm[block[3]]; + + pixels += line_size; + block += 8; + } +} + static void put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) @@ -487,6 +505,23 @@ block += 8; } } + +static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels[2] = cm[pixels[2] + block[2]]; + pixels[3] = cm[pixels[3] + block[3]]; + pixels += line_size; + block += 8; + } +} #if 0 #define PIXOP2(OPNAME, OP) \ @@ -3294,6 +3329,17 @@ add_pixels_clamped_c(block, dest, line_size); } +static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct4 (block); + put_pixels_clamped4_c(block, dest, line_size); +} +static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct4 (block); + add_pixels_clamped4_c(block, dest, line_size); +} + /* init static data */ void dsputil_static_init(void) { @@ -3332,16 +3378,23 @@ } #endif //CONFIG_ENCODERS - if(avctx->idct_algo==FF_IDCT_INT){ - c->idct_put= ff_jref_idct_put; - c->idct_add= ff_jref_idct_add; - c->idct = j_rev_dct; - c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - }else{ //accurate/default - c->idct_put= simple_idct_put; - c->idct_add= simple_idct_add; - c->idct = simple_idct; + if(avctx->lowres==1){ + c->idct_put= ff_jref_idct4_put; + c->idct_add= ff_jref_idct4_add; + c->idct = j_rev_dct4; c->idct_permutation_type= FF_NO_IDCT_PERM; + }else{ + if(avctx->idct_algo==FF_IDCT_INT){ + c->idct_put= ff_jref_idct_put; + c->idct_add= ff_jref_idct_add; + c->idct = j_rev_dct; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else{ //accurate/default + c->idct_put= simple_idct_put; + c->idct_add= simple_idct_add; + c->idct = simple_idct; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } } /* VP3 DSP support */ diff -r 507690ff49a2 -r 7e0b2e86afa9 dsputil.h --- a/dsputil.h Sat Sep 25 10:06:31 2004 +0000 +++ b/dsputil.h Sat Sep 25 23:18:58 2004 +0000 @@ -42,6 +42,7 @@ void ff_fdct248_islow (DCTELEM *data); void j_rev_dct (DCTELEM *data); +void j_rev_dct4 (DCTELEM *data); void ff_fdct_mmx(DCTELEM *block); void ff_fdct_mmx2(DCTELEM *block); diff -r 507690ff49a2 -r 7e0b2e86afa9 i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Sat Sep 25 10:06:31 2004 +0000 +++ b/i386/dsputil_mmx.c Sat Sep 25 23:18:58 2004 +0000 @@ -2984,23 +2984,24 @@ } } #endif //CONFIG_ENCODERS - - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ - c->idct_put= ff_simple_idct_put_mmx; - c->idct_add= ff_simple_idct_add_mmx; - c->idct = ff_simple_idct_mmx; - c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; - }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ - if(mm_flags & MM_MMXEXT){ - c->idct_put= ff_libmpeg2mmx2_idct_put; - c->idct_add= ff_libmpeg2mmx2_idct_add; - c->idct = ff_mmxext_idct; - }else{ - c->idct_put= ff_libmpeg2mmx_idct_put; - c->idct_add= ff_libmpeg2mmx_idct_add; - c->idct = ff_mmx_idct; + if(avctx->lowres==0){ + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ + c->idct_put= ff_simple_idct_put_mmx; + c->idct_add= ff_simple_idct_add_mmx; + c->idct = ff_simple_idct_mmx; + c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; + }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ + if(mm_flags & MM_MMXEXT){ + c->idct_put= ff_libmpeg2mmx2_idct_put; + c->idct_add= ff_libmpeg2mmx2_idct_add; + c->idct = ff_mmxext_idct; + }else{ + c->idct_put= ff_libmpeg2mmx_idct_put; + c->idct_add= ff_libmpeg2mmx_idct_add; + c->idct = ff_mmx_idct; + } + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; } - c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; } /* VP3 optimized DSP functions */ diff -r 507690ff49a2 -r 7e0b2e86afa9 jrevdct.c --- a/jrevdct.c Sat Sep 25 10:06:31 2004 +0000 +++ b/jrevdct.c Sat Sep 25 23:18:58 2004 +0000 @@ -1172,5 +1172,461 @@ } } +#undef DCTSIZE +#define DCTSIZE 4 +#define DCTSTRIDE 8 + +void j_rev_dct4(DCTBLOCK data) +{ + int32_t tmp0, tmp1, tmp2, tmp3; + int32_t tmp10, tmp11, tmp12, tmp13; + int32_t z1; + int32_t d0, d2, d4, d6; + register DCTELEM *dataptr; + int rowctr; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + register int *idataptr = (int*)dataptr; + + /* WARNING: we do the same permutation as MMX idct to simplify the + video core */ + d0 = dataptr[0]; + d2 = dataptr[1]; + d4 = dataptr[2]; + d6 = dataptr[3]; + + if ((d2 | d4 | d6) == 0) { + /* AC terms all zero */ + if (d0) { + /* Compute a 32 bit value to assign. */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); + + idataptr[0] = v; + idataptr[1] = v; + } + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } + } + } else { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = d4 << CONST_BITS; + tmp11 = tmp12 = -tmp10; + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = 0; + } + } + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + + d0 = dataptr[DCTSTRIDE*0]; + d2 = dataptr[DCTSTRIDE*1]; + d4 = dataptr[DCTSTRIDE*2]; + d6 = dataptr[DCTSTRIDE*3]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } + } + } else { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = d4 << CONST_BITS; + tmp11 = tmp12 = -tmp10; + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = 0; + } + } + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSTRIDE*0] = (DCTELEM) DESCALE(tmp10, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*1] = (DCTELEM) DESCALE(tmp11, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*2] = (DCTELEM) DESCALE(tmp12, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*3] = (DCTELEM) DESCALE(tmp13, + CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} + + #undef FIX #undef CONST_BITS diff -r 507690ff49a2 -r 7e0b2e86afa9 mpeg12.c --- a/mpeg12.c Sat Sep 25 10:06:31 2004 +0000 +++ b/mpeg12.c Sat Sep 25 23:18:58 2004 +0000 @@ -2409,6 +2409,7 @@ AVCodecContext *avctx= s->avctx; int ret; const int field_pic= s->picture_structure != PICT_FRAME; + const int lowres= s->avctx->lowres; s->resync_mb_x= s->resync_mb_y= -1; @@ -2518,9 +2519,9 @@ } } - s->dest[0] += 16; - s->dest[1] += 16 >> s->chroma_x_shift; - s->dest[2] += 16 >> s->chroma_x_shift; + s->dest[0] += 16 >> lowres; + s->dest[1] += 16 >> (s->chroma_x_shift + lowres); + s->dest[2] += 16 >> (s->chroma_x_shift + lowres); MPV_decode_mb(s, s->block); diff -r 507690ff49a2 -r 7e0b2e86afa9 mpegvideo.c --- a/mpegvideo.c Sat Sep 25 10:06:31 2004 +0000 +++ b/mpegvideo.c Sat Sep 25 23:18:58 2004 +0000 @@ -2607,6 +2607,82 @@ pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift); } } + +/* apply one mpeg motion vector to the three components */ +static always_inline void mpeg_motion_lowres(MpegEncContext *s, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int field_based, int bottom_field, int field_select, + uint8_t **ref_picture, h264_chroma_mc_func *pix_op, + int motion_x, int motion_y, int h) +{ + uint8_t *ptr_y, *ptr_cb, *ptr_cr; + int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy; + const int lowres= s->avctx->lowres; + const int block_s= 8>>lowres; + const int s_mask= (2<h_edge_pos >> lowres; + const int v_edge_pos = s->v_edge_pos >> lowres; + linesize = s->current_picture.linesize[0] << field_based; + uvlinesize = s->current_picture.linesize[1] << field_based; + + sx= motion_x & s_mask; + sy= motion_y & s_mask; + src_x = s->mb_x*2*block_s + (motion_x >> (lowres+1)); + src_y = s->mb_y*2*block_s + (motion_y >> (lowres+1)); + + if (s->out_format == FMT_H263) { + uvsx = sx | ((motion_x & 2)>>1); + uvsy = sy | ((motion_y & 2)>>1); + uvsrc_x = src_x>>1; + uvsrc_y = src_y>>1; + }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261 + mx = motion_x / 4; + my = motion_y / 4; + uvsx = (2*mx) & s_mask; + uvsy = (2*my) & s_mask; + uvsrc_x = s->mb_x*block_s + (mx >> lowres); + uvsrc_y = s->mb_y*block_s + (my >> lowres); + } else { + mx = motion_x / 2; + my = motion_y / 2; + uvsx = mx & s_mask; + uvsy = my & s_mask; + uvsrc_x = s->mb_x*block_s + (mx >> (lowres+1)); + uvsrc_y = s->mb_y*block_s + (my >> (lowres+1)); + } + + ptr_y = ref_picture[0] + src_y * linesize + src_x; + ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x; + ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x; + + if( (unsigned)src_x > h_edge_pos - (!!sx) - 2*block_s + || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){ + ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, + src_x, src_y<edge_emu_buffer; + if(!(s->flags&CODEC_FLAG_GRAY)){ + uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize; + ff_emulated_edge_mc(uvbuf , ptr_cb, s->uvlinesize, 9, 9+field_based, + uvsrc_x, uvsrc_y<>1, v_edge_pos>>1); + ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, + uvsrc_x, uvsrc_y<>1, v_edge_pos>>1); + ptr_cb= uvbuf; + ptr_cr= uvbuf+16; + } + } + + sx <<= 2 - lowres; + sy <<= 2 - lowres; + pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy); + + if(!(s->flags&CODEC_FLAG_GRAY)){ + uvsx <<= 2 - lowres; + uvsy <<= 2 - lowres; + pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); + pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); + } +} + //FIXME move to dsputil, avg variant, 16x16 version static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){ int x; @@ -3085,6 +3161,28 @@ } } +/** + * motion compesation of a single macroblock + * @param s context + * @param dest_y luma destination pointer + * @param dest_cb chroma cb/u destination pointer + * @param dest_cr chroma cr/v destination pointer + * @param dir direction (0->forward, 1->backward) + * @param ref_picture array[3] of pointers to the 3 planes of the reference picture + * @param pic_op halfpel motion compensation function (average or put normally) + * the motion vectors are taken from s->mv and the MV type from s->mv_type + */ +static inline void MPV_motion_lowres(MpegEncContext *s, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int dir, uint8_t **ref_picture, + h264_chroma_mc_func *pix_op) +{ + assert(s->mv_type == MV_TYPE_16X16); + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 0, 0, 0, + ref_picture, pix_op, + s->mv[dir][0][0], s->mv[dir][0][1], 16>>s->avctx->lowres); +} /* put block[] to dest[] */ static inline void put_dct(MpegEncContext *s, @@ -3156,7 +3254,7 @@ s->mv : motion vector s->interlaced_dct : true if interlaced dct used (mpeg2) */ -void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]) +static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag) { int mb_x, mb_y; const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; @@ -3202,7 +3300,8 @@ qpel_mc_func (*op_qpix)[16]; const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics const int uvlinesize= s->current_picture.linesize[1]; - const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band; + const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag; + const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8; /* avoid copy if macroblock skipped in last frame too */ /* skip only during decoding as we might trash the buffers during encoding a bit */ @@ -3230,10 +3329,10 @@ *mbskip_ptr = 0; /* not skipped */ } } - + dct_linesize = linesize << s->interlaced_dct; - dct_offset =(s->interlaced_dct)? linesize : linesize*8; - + dct_offset =(s->interlaced_dct)? linesize : linesize*block_size; + if(readable){ dest_y= s->dest[0]; dest_cb= s->dest[1]; @@ -3243,25 +3342,37 @@ dest_cb= s->b_scratchpad+16*linesize; dest_cr= s->b_scratchpad+32*linesize; } + if (!s->mb_intra) { /* motion handling */ /* decoding or more than one mb_type (MC was allready done otherwise) */ if(!s->encoding){ - if ((!s->no_rounding) || s->pict_type==B_TYPE){ - op_pix = s->dsp.put_pixels_tab; - op_qpix= s->dsp.put_qpel_pixels_tab; + if(lowres_flag){ + h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab; + + if (s->mv_dir & MV_DIR_FORWARD) { + MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix); + op_pix = s->dsp.avg_h264_chroma_pixels_tab; + } + if (s->mv_dir & MV_DIR_BACKWARD) { + MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix); + } }else{ - op_pix = s->dsp.put_no_rnd_pixels_tab; - op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; - } - - if (s->mv_dir & MV_DIR_FORWARD) { - MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix); - op_pix = s->dsp.avg_pixels_tab; - op_qpix= s->dsp.avg_qpel_pixels_tab; - } - if (s->mv_dir & MV_DIR_BACKWARD) { - MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix); + if ((!s->no_rounding) || s->pict_type==B_TYPE){ + op_pix = s->dsp.put_pixels_tab; + op_qpix= s->dsp.put_qpel_pixels_tab; + }else{ + op_pix = s->dsp.put_no_rnd_pixels_tab; + op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; + } + if (s->mv_dir & MV_DIR_FORWARD) { + MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix); + op_pix = s->dsp.avg_pixels_tab; + op_qpix= s->dsp.avg_qpel_pixels_tab; + } + if (s->mv_dir & MV_DIR_BACKWARD) { + MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix); + } } } @@ -3271,20 +3382,20 @@ /* add dct residue */ if(s->encoding || !( s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){ - add_dequant_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale); - add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale); - add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale); - add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale); + add_dequant_dct(s, block[0], 0, dest_y , dct_linesize, s->qscale); + add_dequant_dct(s, block[1], 1, dest_y + block_size, dct_linesize, s->qscale); + add_dequant_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize, s->qscale); + add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale); if(!(s->flags&CODEC_FLAG_GRAY)){ add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); } } else if(s->codec_id != CODEC_ID_WMV2){ - add_dct(s, block[0], 0, dest_y, dct_linesize); - add_dct(s, block[1], 1, dest_y + 8, dct_linesize); - add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); - add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize); + add_dct(s, block[0], 0, dest_y , dct_linesize); + add_dct(s, block[1], 1, dest_y + block_size, dct_linesize); + add_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize); + add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize); if(!(s->flags&CODEC_FLAG_GRAY)){ if(s->chroma_y_shift){//Chroma420 @@ -3316,20 +3427,20 @@ } else { /* dct only in intra block */ if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){ - put_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale); - put_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale); - put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale); - put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale); + put_dct(s, block[0], 0, dest_y , dct_linesize, s->qscale); + put_dct(s, block[1], 1, dest_y + block_size, dct_linesize, s->qscale); + put_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize, s->qscale); + put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale); if(!(s->flags&CODEC_FLAG_GRAY)){ put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); } }else{ - s->dsp.idct_put(dest_y , dct_linesize, block[0]); - s->dsp.idct_put(dest_y + 8, dct_linesize, block[1]); - s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]); - s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]); + s->dsp.idct_put(dest_y , dct_linesize, block[0]); + s->dsp.idct_put(dest_y + block_size, dct_linesize, block[1]); + s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]); + s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]); if(!(s->flags&CODEC_FLAG_GRAY)){ if(s->chroma_y_shift){ @@ -3362,6 +3473,11 @@ } } +void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){ + if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1); + else MPV_decode_mb_internal(s, block, 0); +} + #ifdef CONFIG_ENCODERS static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold) @@ -3492,6 +3608,7 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics const int uvlinesize= s->current_picture.linesize[1]; + const int mb_size= 4 - s->avctx->lowres; s->block_index[0]= s->b8_stride*(s->mb_y*2 ) - 2 + s->mb_x*2; s->block_index[1]= s->b8_stride*(s->mb_y*2 ) - 1 + s->mb_x*2; @@ -3501,15 +3618,15 @@ s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1; //block_index is not used by mpeg2, so it is not affected by chroma_format - s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << 4); - s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (4 - s->chroma_x_shift)); - s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (4 - s->chroma_x_shift)); + s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size); + s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift)); + s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift)); if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME)) { - s->dest[0] += s->mb_y * linesize << 4; - s->dest[1] += s->mb_y * uvlinesize << (4 - s->chroma_y_shift); - s->dest[2] += s->mb_y * uvlinesize << (4 - s->chroma_y_shift); + s->dest[0] += s->mb_y * linesize << mb_size; + s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift); + s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift); } } diff -r 507690ff49a2 -r 7e0b2e86afa9 mpegvideo.h --- a/mpegvideo.h Sat Sep 25 10:06:31 2004 +0000 +++ b/mpegvideo.h Sat Sep 25 23:18:58 2004 +0000 @@ -773,15 +773,17 @@ void ff_init_block_index(MpegEncContext *s); static inline void ff_update_block_index(MpegEncContext *s){ + const int block_size= 8>>s->avctx->lowres; + s->block_index[0]+=2; s->block_index[1]+=2; s->block_index[2]+=2; s->block_index[3]+=2; s->block_index[4]++; s->block_index[5]++; - s->dest[0]+= 16; - s->dest[1]+= 8; - s->dest[2]+= 8; + s->dest[0]+= 2*block_size; + s->dest[1]+= block_size; + s->dest[2]+= block_size; } static inline int get_bits_diff(MpegEncContext *s){