# HG changeset patch # User nickols_k # Date 996483874 0 # Node ID 1b4461b5a7fb122c666f5868933a9915a27567da # Parent 1d3ac96541785fefb15205796750daa77b9ac8e0 Sync with mplayer's stuff diff -r 1d3ac9654178 -r 1b4461b5a7fb Makefile --- a/Makefile Tue Jul 24 20:43:41 2001 +0000 +++ b/Makefile Mon Jul 30 09:04:34 2001 +0000 @@ -1,6 +1,6 @@ include ../config.mak -CFLAGS= $(OPTFLAGS) -Wall -g +CFLAGS= $(OPTFLAGS) -Wall -g -DHAVE_CONFIG_H LDFLAGS= -g OBJS= common.o utils.o mpegvideo.o h263.o jrevdct.o jfdctfst.o \ diff -r 1d3ac9654178 -r 1b4461b5a7fb common.c --- a/common.c Tue Jul 24 20:43:41 2001 +0000 +++ b/common.c Mon Jul 30 09:04:34 2001 +0000 @@ -29,6 +29,8 @@ #define NDEBUG #include +#include "../bswap.h" + void init_put_bits(PutBitContext *s, UINT8 *buffer, int buffer_size, void *opaque, @@ -222,10 +224,14 @@ buf_ptr += 4; /* handle common case: we can read everything */ if (buf_ptr <= s->buf_end) { - bit_buf = (buf_ptr[-4] << 24) | - (buf_ptr[-3] << 16) | +#if ARCH_X86 + bit_buf = bswap_32(*((unsigned long*)(&buf_ptr[-4]))); +#else + bit_buf = (buf_ptr[-4] << 24) | + (buf_ptr[-3] << 16) | (buf_ptr[-2] << 8) | - (buf_ptr[-1]); + (buf_ptr[-1]); +#endif } else { buf_ptr -= 4; bit_buf = 0; diff -r 1d3ac9654178 -r 1b4461b5a7fb i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Tue Jul 24 20:43:41 2001 +0000 +++ b/i386/dsputil_mmx.c Mon Jul 30 09:04:34 2001 +0000 @@ -30,8 +30,10 @@ int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); /* pixel operations */ -static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; -static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; +static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; +static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; +//static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; +//static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; /***********************************/ /* 3Dnow specific */ @@ -215,7 +217,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm4\n\t" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -250,7 +252,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm4\n\t" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -287,7 +289,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo[0]):"memory"); + ::"m"(mm_wtwo):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -399,7 +401,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -448,7 +450,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %0, %%mm0\n\t" @@ -485,7 +487,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %1, %%mm1\n\t" @@ -531,7 +533,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %1, %%mm1\n\t" @@ -577,7 +579,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo[0]):"memory"); + ::"m"(mm_wtwo):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -621,7 +623,7 @@ "movq %%mm0, %0\n\t" :"=m"(*p) :"m"(*pix), - "m"(*(pix+line_size)), "m"(mm_wone[0]) + "m"(*(pix+line_size)), "m"(mm_wone) :"memory"); pix += line_size; p += line_size ; @@ -748,7 +750,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" @@ -832,7 +834,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %0, %%mm0\n\t" @@ -872,7 +874,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6" - ::"m"(mm_wone[0]):"memory"); + ::"m"(mm_wone):"memory"); do { __asm __volatile( "movq %0, %%mm0\n\t" @@ -912,7 +914,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo[0]):"memory"); + ::"m"(mm_wtwo):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" diff -r 1d3ac9654178 -r 1b4461b5a7fb i386/dsputil_mmx_avg.h --- a/i386/dsputil_mmx_avg.h Tue Jul 24 20:43:41 2001 +0000 +++ b/i386/dsputil_mmx_avg.h Mon Jul 30 09:04:34 2001 +0000 @@ -243,7 +243,7 @@ __asm __volatile( "pxor %%mm7, %%mm7\n\t" "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo[0]):"memory"); + ::"m"(mm_wtwo):"memory"); do { __asm __volatile( "movq %1, %%mm0\n\t" diff -r 1d3ac9654178 -r 1b4461b5a7fb i386/mpegvideo.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/i386/mpegvideo.c Mon Jul 30 09:04:34 2001 +0000 @@ -0,0 +1,239 @@ +/* + * The simplest mpeg encoder (well, it was the simplest!) + * Copyright (c) 2000,2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Optimized for ia32 cpus by Nick Kurshev + */ + +void MPV_frame_start(MpegEncContext *s) +{ + if (s->pict_type == B_TYPE) { + __asm __volatile( + "movl (%1), %%eax\n\t" + "movl 4(%1), %%edx\n\t" + "movl 8(%1), %%ecx\n\t" + "movl %%eax, (%0)\n\t" + "movl %%edx, 4(%0)\n\t" + "movl %%ecx, 8(%0)\n\t" + : + :"r"(s->current_picture), "r"(s->aux_picture) + :"eax","edx","ecx","memory"); + } else { + /* swap next and last */ + __asm __volatile( + "movl (%1), %%eax\n\t" + "movl 4(%1), %%edx\n\t" + "movl 8(%1), %%ecx\n\t" + "xchgl (%0), %%eax\n\t" + "xchgl 4(%0), %%edx\n\t" + "xchgl 8(%0), %%ecx\n\t" + "movl %%eax, (%1)\n\t" + "movl %%edx, 4(%1)\n\t" + "movl %%ecx, 8(%1)\n\t" + "movl %%eax, (%2)\n\t" + "movl %%edx, 4(%2)\n\t" + "movl %%ecx, 8(%2)\n\t" + : + :"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture) + :"eax","edx","ecx","memory"); + } +} + +static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale); + +#ifdef HAVE_MMX +static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; +static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; + +/* + NK: + Note: looking at PARANOID: + "enable all paranoid tests for rounding, overflows, etc..." + +#ifdef PARANOID + if (level < -2048 || level > 2047) + fprintf(stderr, "unquant error %d %d\n", i, level); +#endif + We can suppose that result of two multiplications can't be greate of 0xFFFF + i.e. is 16-bit, so we use here only PMULLW instruction and can avoid + a complex multiplication. +===================================================== + Full formula for multiplication of 2 integer numbers + which are represent as high:low words: + input: value1 = high1:low1 + value2 = high2:low2 + output: value3 = value1*value2 + value3=high3:low3 (on overflow: modulus 2^32 wrap-around) + this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4 + but this algorithm will compute only 0x66cb0ce4 + this limited by 16-bit size of operands + --------------------------------- + tlow1 = high1*low2 + tlow2 = high2*low1 + tlow1 = tlow1 + tlow2 + high3:low3 = low1*low2 + high3 += tlow1 +*/ +#ifdef BIN_PORTABILITY +static void dct_unquantize_mmx +#else +#define HAVE_DCT_UNQUANTIZE 1 +static void dct_unquantize +#endif +(MpegEncContext *s,DCTELEM *block, int n, int qscale) +{ + int i, level; + const UINT16 *quant_matrix; + if (s->mb_intra) { + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + if (s->out_format == FMT_H263) { + i = 1; + goto unquant_even; + } + /* XXX: only mpeg1 */ + quant_matrix = s->intra_matrix; + i=1; + /* Align on 4 elements boundary */ + while(i&3) + { + level = block[i]; + if (level) { + if (level < 0) level = -level; + level = (int)(level * qscale * quant_matrix[i]) >> 3; + level = (level - 1) | 1; + if (block[i] < 0) level = -level; + block[i] = level; + } + i++; + } + __asm __volatile( + "movd %0, %%mm6\n\t" /* mm6 = qscale | 0 */ + "punpckldq %%mm6, %%mm6\n\t" /* mm6 = qscale | qscale */ + "movq %2, %%mm4\n\t" + "movq %%mm6, %%mm7\n\t" + "movq %1, %%mm5\n\t" + "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */ + "pxor %%mm6, %%mm6\n\t" + ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory"); + for(;i<64;i+=4) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %%mm7, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "pcmpgtw %%mm6, %%mm2\n\t" + "pmullw %2, %%mm1\n\t" + "pandn %%mm4, %%mm2\n\t" + "por %%mm5, %%mm2\n\t" + "pmullw %%mm2, %%mm0\n\t" /* mm0 = abs(block[i]). */ + + "pcmpeqw %%mm6, %%mm3\n\t" + "pmullw %%mm0, %%mm1\n\t" + "psraw $3, %%mm1\n\t" + "psubw %%mm5, %%mm1\n\t" /* block[i] --; */ + "pandn %%mm4, %%mm3\n\t" /* fake of pcmpneqw : mm0 != 0 then mm1 = -1 */ + "por %%mm5, %%mm1\n\t" /* block[i] |= 1 */ + "pmullw %%mm2, %%mm1\n\t" /* change signs again */ + + "pand %%mm3, %%mm1\n\t" /* nullify if was zero */ + "movq %%mm1, %0" + :"=m"(block[i]) + :"m"(block[i]), "m"(quant_matrix[i]) + :"memory"); + } + } else { + i = 0; + unquant_even: + quant_matrix = s->non_intra_matrix; + /* Align on 4 elements boundary */ + while(i&3) + { + level = block[i]; + if (level) { + if (level < 0) level = -level; + level = (((level << 1) + 1) * qscale * + ((int) quant_matrix[i])) >> 4; + level = (level - 1) | 1; + if(block[i] < 0) level = -level; + block[i] = level; + } + i++; + } + __asm __volatile( + "movd %0, %%mm6\n\t" /* mm6 = qscale | 0 */ + "punpckldq %%mm6, %%mm6\n\t" /* mm6 = qscale | qscale */ + "movq %2, %%mm4\n\t" + "movq %%mm6, %%mm7\n\t" + "movq %1, %%mm5\n\t" + "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */ + "pxor %%mm6, %%mm6\n\t" + ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory"); + for(;i<64;i+=4) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %%mm7, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm0, %%mm3\n\t" + "pcmpgtw %%mm6, %%mm2\n\t" + "pmullw %2, %%mm1\n\t" + "pandn %%mm4, %%mm2\n\t" + "por %%mm5, %%mm2\n\t" + "pmullw %%mm2, %%mm0\n\t" /* mm0 = abs(block[i]). */ + "psllw $1, %%mm0\n\t" /* block[i] <<= 1 */ + "paddw %%mm5, %%mm0\n\t" /* block[i] ++ */ + + "pmullw %%mm0, %%mm1\n\t" + "psraw $4, %%mm1\n\t" + "pcmpeqw %%mm6, %%mm3\n\t" + "psubw %%mm5, %%mm1\n\t" /* block[i] --; */ + "pandn %%mm4, %%mm3\n\t" /* fake of pcmpneqw : mm0 != 0 then mm1 = -1 */ + "por %%mm5, %%mm1\n\t" /* block[i] |= 1 */ + "pmullw %%mm2, %%mm1\n\t" /* change signs again */ + + "pand %%mm3, %%mm1\n\t" /* nullify if was zero */ + "movq %%mm1, %0" + :"=m"(block[i]) + :"m"(block[i]), "m"(quant_matrix[i]) + :"memory"); + } + } +} + +#ifdef BIN_PORTABILITY +static void (*dct_unquantize_ptr)(MpegEncContext *s, + DCTELEM *block, int n, int qscale); + +void MPV_common_init_mmx(void) +{ + int mm_flags; + mm_flags = mm_support(); + if (mm_flags & MM_MMX) { + dct_unquantize_ptr = dct_unquantize_mmx; + } + else { + dct_unquantize_ptr = dct_unquantize; + } +} + +#define DCT_UNQUANTIZE(a,b,c,d) (*dct_unquantize_ptr)(a,b,c,d) +#else +#define DCT_UNQUANTIZE(a,b,c,d) dct_unquantize(a,b,c,d) +#endif /* BIN_PORTABILITY */ +#endif /* HAVE_MMX */ diff -r 1d3ac9654178 -r 1b4461b5a7fb mpegvideo.c --- a/mpegvideo.c Tue Jul 24 20:43:41 2001 +0000 +++ b/mpegvideo.c Mon Jul 30 09:04:34 2001 +0000 @@ -24,6 +24,15 @@ #include "dsputil.h" #include "mpegvideo.h" +#include "../config.h" + +#ifdef ARCH_X86 +#include "i386/mpegvideo.c" +#endif +#ifndef DCT_UNQUANTIZE +#define DCT_UNQUANTIZE(a,b,c,d) dct_unquantize(a,b,c,d) +#endif + #define EDGE_WIDTH 16 /* enable all paranoid tests for rounding, overflows, etc... */ @@ -89,6 +98,9 @@ int c_size, i; UINT8 *pict; +#if defined ( HAVE_MMX ) && defined ( BIN_PORTABILITY ) + MPV_common_init_mmx(); +#endif s->mb_width = (s->width + 15) / 16; s->mb_height = (s->height + 15) / 16; s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH; @@ -345,8 +357,8 @@ } } - /* generic function for encode/decode called before a frame is coded/decoded */ +#ifndef ARCH_X86 void MPV_frame_start(MpegEncContext *s) { int i; @@ -366,7 +378,7 @@ } } } - +#endif /* generic function for encode/decode called after a frame has been coded/decoded */ void MPV_frame_end(MpegEncContext *s) { @@ -621,7 +633,7 @@ DCTELEM *block, int i, UINT8 *dest, int line_size) { if (!s->mpeg2) - dct_unquantize(s, block, i, s->qscale); + DCT_UNQUANTIZE(s, block, i, s->qscale); j_rev_dct (block); put_pixels_clamped(block, dest, line_size); } @@ -632,7 +644,7 @@ { if (s->block_last_index[i] >= 0) { if (!s->mpeg2) - dct_unquantize(s, block, i, s->qscale); + DCT_UNQUANTIZE(s, block, i, s->qscale); j_rev_dct (block); add_pixels_clamped(block, dest, line_size); } @@ -1109,6 +1121,7 @@ return last_non_zero; } +#ifndef HAVE_DCT_UNQUANTIZE static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale) { @@ -1172,7 +1185,7 @@ } } } - +#endif /* rate control */