# HG changeset patch # User michaelni # Date 1033678639 0 # Node ID 71f669e9f633c4bc7edae689daabd363d72ec1f9 # Parent 409bdaa0b9647daa951a6bfea2c4f1ab1747e8f5 ps2 optimizations update patch by (Leon van Stuivenberg ) diff -r 409bdaa0b964 -r 71f669e9f633 Makefile --- a/Makefile Thu Oct 03 19:49:23 2002 +0000 +++ b/Makefile Thu Oct 03 20:57:19 2002 +0000 @@ -77,7 +77,7 @@ endif ifeq ($(TARGET_MMI),yes) -OBJS += ps2/dsputil_mmi.o ps2/idct_mmi.o +OBJS += ps2/dsputil_mmi.o ps2/idct_mmi.o ps2/mpegvideo_mmi.o endif ifeq ($(TARGET_ALTIVEC),yes) diff -r 409bdaa0b964 -r 71f669e9f633 mpegvideo.c --- a/mpegvideo.c Thu Oct 03 19:49:23 2002 +0000 +++ b/mpegvideo.c Thu Oct 03 20:57:19 2002 +0000 @@ -216,6 +216,9 @@ #ifdef HAVE_MLIB MPV_common_init_mlib(s); #endif +#ifdef HAVE_MMI + MPV_common_init_mmi(s); +#endif /* load & permutate scantables diff -r 409bdaa0b964 -r 71f669e9f633 mpegvideo.h --- a/mpegvideo.h Thu Oct 03 19:49:23 2002 +0000 +++ b/mpegvideo.h Thu Oct 03 20:57:19 2002 +0000 @@ -503,6 +503,9 @@ #ifdef HAVE_MLIB void MPV_common_init_mlib(MpegEncContext *s); #endif +#ifdef HAVE_MMI +void MPV_common_init_mmi(MpegEncContext *s); +#endif extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w); void ff_conceal_past_errors(MpegEncContext *s, int conceal_all); void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length); diff -r 409bdaa0b964 -r 71f669e9f633 ps2/dsputil_mmi.c --- a/ps2/dsputil_mmi.c Thu Oct 03 19:49:23 2002 +0000 +++ b/ps2/dsputil_mmi.c Thu Oct 03 20:57:19 2002 +0000 @@ -20,96 +20,113 @@ */ #include "../dsputil.h" +#include "mmi.h" -void ff_mmi_idct(DCTELEM * block); - -#include "mmi.h" +/* the provided 'as' in binutils 2.9EE doesn't support +the EE's mips3 instructions properly */ +#define AS_BUGGY static void clear_blocks_mmi(DCTELEM * blocks) { - /* $4 = blocks */ int i; for (i = 0; i < 6; i++) { - sq($0, 0, $4); - sq($0, 16, $4); - sq($0, 32, $4); - sq($0, 48, $4); - sq($0, 64, $4); - sq($0, 80, $4); - sq($0, 96, $4); - sq($0, 112, $4); - __asm__ __volatile__("addi $4, $4, 128"); + asm volatile( + "sq $0, 0(%0) \n\t" + "sq $0, 16(%0) \n\t" + "sq $0, 32(%0) \n\t" + "sq $0, 48(%0) \n\t" + "sq $0, 64(%0) \n\t" + "sq $0, 80(%0) \n\t" + "sq $0, 96(%0) \n\t" + "sq $0, 112(%0) \n\t" :: "r" (blocks) : "memory" ); + blocks += 64; + } +} + + +static void get_pixels_mmi(DCTELEM *block, const UINT8 *pixels, int line_size) +{ + int i; + for(i=0;i<8;i++) { +#ifdef AS_BUGGY + ld3(5, 0, 8); + asm volatile( + "add %1, %1, %2 \n\t" + "pextlb $8, $0, $8 \n\t" + "sq $8, 0(%0) \n\t" :: "r" (block), "r" (pixels), "r" (line_size) : "$8", "memory" ); +#else + asm volatile( + "ld $8, 0(%1) \n\t" + "add %1, %1, %2 \n\t" + "pextlb $8, $0, $8 \n\t" + "sq $8, 0(%0) \n\t" :: "r" (block), "r" (pixels), "r" (line_size) : "$8", "memory" ); +#endif + block += 8; } } -static void put_pixels_clamped_mmi(const DCTELEM * block, UINT8 * pixels, - int line_size) +static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - /* $4 = block, $5 = pixels, $6 = line_size */ - __asm__ __volatile__("li $11, 255":::"$11"); - lq($4, 0, $12); - pcpyld($11, $11, $11); - pcpyh($11, $11); - -#define PUT(rs) \ - ppacb($0, $##rs, $##rs); \ - sd3(rs, 0, 5); \ - __asm__ __volatile__ ("add $5, $5, $6"); - - pminh($12, $11, $12); - pmaxh($12, $0, $12); - lq($4, 16, $13); - PUT(12); - - pminh($13, $11, $13); - pmaxh($13, $0, $13); - lq($4, 32, $12); - PUT(13); - - pminh($12, $11, $12); - pmaxh($12, $0, $12); - lq($4, 48, $13); - PUT(12); - - pminh($13, $11, $13); - pmaxh($13, $0, $13); - lq($4, 64, $12); - PUT(13); - - pminh($12, $11, $12); - pmaxh($12, $0, $12); - lq($4, 80, $13); - PUT(12); - - pminh($13, $11, $13); - pmaxh($13, $0, $13); - lq($4, 96, $12); - PUT(13); - - pminh($12, $11, $12); - pmaxh($12, $0, $12); - lq($4, 112, $13); - PUT(12); - - pminh($13, $11, $13); - pmaxh($13, $0, $13); - PUT(13); + int i; + for(i=0; i + */ + +#include "../dsputil.h" +#include "../mpegvideo.h" +#include "../avcodec.h" + +void ff_mmi_idct_put(UINT8 *dest, int line_size, DCTELEM *block); +void ff_mmi_idct_add(UINT8 *dest, int line_size, DCTELEM *block); + + +static void dct_unquantize_h263_mmi(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int level=0, qmul, qadd; + int nCoeffs; + + assert(s->block_last_index[n]>=0); + + qadd = (qscale - 1) | 1; + qmul = qscale << 1; + + if (s->mb_intra) { + if (!s->h263_aic) { + if (n < 4) + level = block[0] * s->y_dc_scale; + else + level = block[0] * s->c_dc_scale; + }else { + qadd = 0; + level = block[0]; + } + nCoeffs= 63; //does not allways use zigzag table + } else { + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + } + + asm volatile( + "add $14, $0, %3 \n\t" + "pcpyld $8, %0, %0 \n\t" + "pcpyh $8, $8 \n\t" //r8 = qmul + "pcpyld $9, %1, %1 \n\t" + "pcpyh $9, $9 \n\t" //r9 = qadd + ".p2align 2 \n\t" + "1: \n\t" + "lq $10, 0($14) \n\t" //r10 = level + "addi $14, $14, 16 \n\t" //block+=8 + "addi %2, %2, -8 \n\t" + "pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0 + "pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0 + "por $12, $11, $12 \n\t" + "pmulth $10, $10, $8 \n\t" + "paddh $13, $9, $11 \n\t" + "pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd + "pmfhl.uw $11 \n\t" + "pinteh $10, $11, $10 \n\t" //r10 = level * qmul + "paddh $10, $10, $13 \n\t" + "pand $10, $10, $12 \n\t" + "sq $10, -16($14) \n\t" + "bgez %2, 1b \n\t" + :: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" ); + + if(s->mb_intra) + block[0]= level; +} + + +void MPV_common_init_mmi(MpegEncContext *s) +{ + int i; +// const int dct_algo = s->avctx->dct_algo; + const int idct_algo= s->avctx->idct_algo; + + if(idct_algo==FF_IDCT_AUTO){ + s->idct_put= ff_mmi_idct_put; + s->idct_add= ff_mmi_idct_add; + for(i=0; i<64; i++) + s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + } + s->dct_unquantize_h263 = dct_unquantize_h263_mmi; +} + +