Mercurial > libavcodec.hg
changeset 19:82d4c9be9873 libavcodec
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
Gives average 13-20% mpeg decoding speedup on x86 systems.
author | arpi_esp |
---|---|
date | Fri, 03 Aug 2001 18:33:03 +0000 |
parents | 1d2077091e88 |
children | 907b67420d84 |
files | dsputil.c dsputil.h i386/dsputil_mmx.c mpeg12data.h mpegvideo.c mpegvideo.h |
diffstat | 6 files changed, 37 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/dsputil.c Thu Aug 02 18:46:26 2001 +0000 +++ b/dsputil.c Fri Aug 03 18:33:03 2001 +0000 @@ -21,6 +21,7 @@ #include "avcodec.h" #include "dsputil.h" +void (*ff_idct)(DCTELEM *block); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); @@ -363,6 +364,7 @@ squareTbl[i] = (i - 256) * (i - 256); } + ff_idct = j_rev_dct; get_pixels = get_pixels_c; put_pixels_clamped = put_pixels_clamped_c; add_pixels_clamped = add_pixels_clamped_c;
--- a/dsputil.h Thu Aug 02 18:46:26 2001 +0000 +++ b/dsputil.h Fri Aug 03 18:33:03 2001 +0000 @@ -25,6 +25,7 @@ /* pixel ops : interface with DCT */ +extern void (*ff_idct)(DCTELEM *block); extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
--- a/i386/dsputil_mmx.c Thu Aug 02 18:46:26 2001 +0000 +++ b/i386/dsputil_mmx.c Fri Aug 03 18:33:03 2001 +0000 @@ -29,6 +29,16 @@ int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); +#ifdef USE_MMX_IDCT +/* external functions, defined in libmpeg2 */ +void mmx_idct(DCTELEM *block); +void mmxext_idct(DCTELEM *block); +/* this should be in dsputil.h? -- A'rpi */ +extern UINT8 ff_alternate_horizontal_scan[64]; +extern UINT8 ff_alternate_vertical_scan[64]; +extern UINT8 zigzag_direct[64]; +#endif + /* pixel operations */ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; @@ -1039,5 +1049,23 @@ sub_pixels_tab[1] = sub_pixels_x2_3dnow; sub_pixels_tab[2] = sub_pixels_y2_3dnow; } + +#ifdef USE_MMX_IDCT + /* use MMX / MMXEXT iDCT code from libmpeg2 */ + //printf("LIBAVCODEC: Using MMX%s iDCT code\n",(mm_flags & MM_MMXEXT)?"EXT":""); + ff_idct = (mm_flags & MM_MMXEXT) ? mmxext_idct : mmx_idct; + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + { int i,j; + for (i = 0; i < 64; i++) { + j = zigzag_direct[i]; + zigzag_direct[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = ff_alternate_horizontal_scan[i]; + ff_alternate_horizontal_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = ff_alternate_vertical_scan[i]; + ff_alternate_vertical_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + } + } +#endif + } }
--- a/mpeg12data.h Thu Aug 02 18:46:26 2001 +0000 +++ b/mpeg12data.h Fri Aug 03 18:33:03 2001 +0000 @@ -331,7 +331,8 @@ { 0xc, 10 }, }; -const UINT8 zigzag_direct[64] = { +//const +UINT8 zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34,
--- a/mpegvideo.c Thu Aug 02 18:46:26 2001 +0000 +++ b/mpegvideo.c Fri Aug 03 18:33:03 2001 +0000 @@ -634,7 +634,7 @@ { if (!s->mpeg2) s->dct_unquantize(s, block, i, s->qscale); - j_rev_dct (block); + ff_idct (block); put_pixels_clamped(block, dest, line_size); } @@ -645,7 +645,7 @@ if (s->block_last_index[i] >= 0) { if (!s->mpeg2) s->dct_unquantize(s, block, i, s->qscale); - j_rev_dct (block); + ff_idct (block); add_pixels_clamped(block, dest, line_size); } }
--- a/mpegvideo.h Thu Aug 02 18:46:26 2001 +0000 +++ b/mpegvideo.h Fri Aug 03 18:33:03 2001 +0000 @@ -179,7 +179,8 @@ DCTELEM *block, int n, int qscale); } MpegEncContext; -extern const UINT8 zigzag_direct[64]; +//const +extern UINT8 zigzag_direct[64]; int MPV_common_init(MpegEncContext *s); void MPV_common_end(MpegEncContext *s);