libavcodec.hg: i386/idct_mmx

annotate i386/idct_mmx_xvid.c @ 3995:b00c06477dff libavcodec

write cabac low and range variables as early as possible to prevent stalls from reading them before they where written, the P4 is said to disslike that alot, on P3 its 2% faster (START/STOP_TIMER over decode_residual)

author	michael
date	Wed, 11 Oct 2006 16:11:41 +0000
parents	c8c591fe26f8
children	d5ba514e3f4a

rev	line source
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	1 ///****************************************************************************
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	2 // *
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	3 // * XVID MPEG-4 VIDEO CODEC
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	4 // * - MMX and XMM forward discrete cosine transform -
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	5 // *
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	6 // * Copyright(C) 2001 Peter Ross <pross@xvid.org>
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	7 // *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3932 diff changeset	8 // * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3932 diff changeset	9 // *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3932 diff changeset	10 // * FFmpeg is free software; you can redistribute it and/or
3932 d13da74a26f5 Switch idct_mmx_xvid.c from GPL to LGPL as permitted by the diego parents: 3563 diff changeset	11 // * modify it under the terms of the GNU Lesser General Public
d13da74a26f5 Switch idct_mmx_xvid.c from GPL to LGPL as permitted by the diego parents: 3563 diff changeset	12 // * License as published by the Free Software Foundation; either
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3932 diff changeset	13 // * version 2.1 of the License, or (at your option) any later version.
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	14 // *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3932 diff changeset	15 // * FFmpeg is distributed in the hope that it will be useful,
3932 d13da74a26f5 Switch idct_mmx_xvid.c from GPL to LGPL as permitted by the diego parents: 3563 diff changeset	16 // * but WITHOUT ANY WARRANTY; without even the implied warranty of
d13da74a26f5 Switch idct_mmx_xvid.c from GPL to LGPL as permitted by the diego parents: 3563 diff changeset	17 // * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d13da74a26f5 Switch idct_mmx_xvid.c from GPL to LGPL as permitted by the diego parents: 3563 diff changeset	18 // * Lesser General Public License for more details.
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	19 // *
3932 d13da74a26f5 Switch idct_mmx_xvid.c from GPL to LGPL as permitted by the diego parents: 3563 diff changeset	20 // * You should have received a copy of the GNU Lesser General Public License
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3932 diff changeset	21 // * along with FFmpeg; if not, write to the Free Software Foundation,
3932 d13da74a26f5 Switch idct_mmx_xvid.c from GPL to LGPL as permitted by the diego parents: 3563 diff changeset	22 // * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	23 // *
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	24 // * $Id$
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	25 // *
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	26 // ***************************************************************************/
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	27
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	28 // ****************************************************************************
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	29 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	30 // Originally provided by Intel at AP-922
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	31 // http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	32 // (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	33 // but in a limited edition.
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	34 // New macro implements a column part for precise iDCT
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	35 // The routine precision now satisfies IEEE standard 1180-1990.
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	36 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	37 // Copyright(C) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	38 // Rounding trick Copyright(C) 2000 Michel Lespinasse <walken@zoy.org>
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	39 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	40 // http://www.elecard.com/peter/idct.html
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	41 // http://www.linuxvideo.org/mpeg2dec/
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	42 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	43 // ***************************************************************************/
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	44 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	45 // These examples contain code fragments for first stage iDCT 8x8
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	46 // (for rows) and first stage DCT 8x8 (for columns)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	47 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	48
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	49 // conversion to gcc syntax by michael niedermayer
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	50
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	51
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	52 #include <inttypes.h>
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	53 #include "../avcodec.h"
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	54
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	55 //=============================================================================
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	56 // Macros and other preprocessor constants
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	57 //=============================================================================
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	58
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	59 #define BITS_INV_ACC 5 // 4 or 5 for IEEE
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	60 #define SHIFT_INV_ROW (16 - BITS_INV_ACC) //11
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	61 #define SHIFT_INV_COL (1 + BITS_INV_ACC) //6
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	62 #define RND_INV_ROW (1024 * (6 - BITS_INV_ACC))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	63 #define RND_INV_COL (16 * (BITS_INV_ACC - 3))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	64 #define RND_INV_CORR (RND_INV_COL - 1)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	65
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	66 #define BITS_FRW_ACC 3 // 2 or 3 for accuracy
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	67 #define SHIFT_FRW_COL BITS_FRW_ACC
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	68 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	69 #define RND_FRW_ROW (262144*(BITS_FRW_ACC - 1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	70
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	71
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	72 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	73 // Various memory constants (trigonometric values or rounding values)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	74 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	75
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	76
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2870 diff changeset	77 static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
2870 f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	78 13036,13036,13036,13036, // tg * (2<<16) + 0.5
f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	79 27146,27146,27146,27146, // tg * (2<<16) + 0.5
f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	80 -21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	81 23170,23170,23170,23170}; // cos * (2<<15) + 0.5
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	82
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2870 diff changeset	83 static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	84 65536,65536,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	85 3597,3597,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	86 2260,2260,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	87 1203,1203,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	88 0,0,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	89 120,120,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	90 512,512,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	91 512,512};
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	92
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	93 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	94 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	95 // The first stage iDCT 8x8 - inverse DCTs of rows
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	96 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	97 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	98 // The 8-point inverse DCT direct algorithm
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	99 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	100 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	101 // static const short w[32] = {
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	102 // FIX(cos_4_16), FIX(cos_2_16), FIX(cos_4_16), FIX(cos_6_16),
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	103 // FIX(cos_4_16), FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16),
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	104 // FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16), FIX(cos_2_16),
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	105 // FIX(cos_4_16), -FIX(cos_2_16), FIX(cos_4_16), -FIX(cos_6_16),
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	106 // FIX(cos_1_16), FIX(cos_3_16), FIX(cos_5_16), FIX(cos_7_16),
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	107 // FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16),
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	108 // FIX(cos_5_16), -FIX(cos_1_16), FIX(cos_7_16), FIX(cos_3_16),
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	109 // FIX(cos_7_16), -FIX(cos_5_16), FIX(cos_3_16), -FIX(cos_1_16) };
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	110 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	111 // #define DCT_8_INV_ROW(x, y)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	112 // {
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	113 // int a0, a1, a2, a3, b0, b1, b2, b3;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	114 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	115 // a0 =x[0]w[0]+x[2]w[1]+x[4]w[2]+x[6]w[3];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	116 // a1 =x[0]w[4]+x[2]w[5]+x[4]w[6]+x[6]w[7];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	117 // a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	118 // a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	119 // b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	120 // b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	121 // b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	122 // b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31];
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	123 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	124 // y[0] = SHIFT_ROUND ( a0 + b0 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	125 // y[1] = SHIFT_ROUND ( a1 + b1 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	126 // y[2] = SHIFT_ROUND ( a2 + b2 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	127 // y[3] = SHIFT_ROUND ( a3 + b3 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	128 // y[4] = SHIFT_ROUND ( a3 - b3 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	129 // y[5] = SHIFT_ROUND ( a2 - b2 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	130 // y[6] = SHIFT_ROUND ( a1 - b1 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	131 // y[7] = SHIFT_ROUND ( a0 - b0 );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	132 // }
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	133 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	134 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	135 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	136 // In this implementation the outputs of the iDCT-1D are multiplied
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	137 // for rows 0,4 - by cos_4_16,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	138 // for rows 1,7 - by cos_1_16,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	139 // for rows 2,6 - by cos_2_16,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	140 // for rows 3,5 - by cos_3_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	141 // and are shifted to the left for better accuracy
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	142 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	143 // For the constants used,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	144 // FIX(float_const) = (short) (float_const * (1<<15) + 0.5)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	145 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	146 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	147
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	148 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	149 // Tables for mmx processors
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	150 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	151
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	152 // Table for rows 0,4 - constants are multiplied by cos_4_16
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2870 diff changeset	153 static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	154 16384,16384,16384,-16384, // movq-> w06 w04 w02 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	155 21407,8867,8867,-21407, // w07 w05 w03 w01
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	156 16384,-16384,16384,16384, // w14 w12 w10 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	157 -8867,21407,-21407,-8867, // w15 w13 w11 w09
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	158 22725,12873,19266,-22725, // w22 w20 w18 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	159 19266,4520,-4520,-12873, // w23 w21 w19 w17
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	160 12873,4520,4520,19266, // w30 w28 w26 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	161 -22725,19266,-12873,-22725, // w31 w29 w27 w25
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	162 // Table for rows 1,7 - constants are multiplied by cos_1_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	163 22725,22725,22725,-22725, // movq-> w06 w04 w02 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	164 29692,12299,12299,-29692, // w07 w05 w03 w01
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	165 22725,-22725,22725,22725, // w14 w12 w10 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	166 -12299,29692,-29692,-12299, // w15 w13 w11 w09
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	167 31521,17855,26722,-31521, // w22 w20 w18 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	168 26722,6270,-6270,-17855, // w23 w21 w19 w17
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	169 17855,6270,6270,26722, // w30 w28 w26 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	170 -31521,26722,-17855,-31521, // w31 w29 w27 w25
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	171 // Table for rows 2,6 - constants are multiplied by cos_2_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	172 21407,21407,21407,-21407, // movq-> w06 w04 w02 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	173 27969,11585,11585,-27969, // w07 w05 w03 w01
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	174 21407,-21407,21407,21407, // w14 w12 w10 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	175 -11585,27969,-27969,-11585, // w15 w13 w11 w09
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	176 29692,16819,25172,-29692, // w22 w20 w18 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	177 25172,5906,-5906,-16819, // w23 w21 w19 w17
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	178 16819,5906,5906,25172, // w30 w28 w26 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	179 -29692,25172,-16819,-29692, // w31 w29 w27 w25
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	180 // Table for rows 3,5 - constants are multiplied by cos_3_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	181 19266,19266,19266,-19266, // movq-> w06 w04 w02 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	182 25172,10426,10426,-25172, // w07 w05 w03 w01
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	183 19266,-19266,19266,19266, // w14 w12 w10 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	184 -10426,25172,-25172,-10426, // w15 w13 w11 w09
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	185 26722,15137,22654,-26722, // w22 w20 w18 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	186 22654,5315,-5315,-15137, // w23 w21 w19 w17
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	187 15137,5315,5315,22654, // w30 w28 w26 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	188 -26722,22654,-15137,-26722, // w31 w29 w27 w25
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	189 };
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	190 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	191 // Tables for xmm processors
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	192 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	193
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	194 // %3 for rows 0,4 - constants are multiplied by cos_4_16
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2870 diff changeset	195 static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	196 16384,21407,16384,8867, // movq-> w05 w04 w01 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	197 16384,8867,-16384,-21407, // w07 w06 w03 w02
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	198 16384,-8867,16384,-21407, // w13 w12 w09 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	199 -16384,21407,16384,-8867, // w15 w14 w11 w10
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	200 22725,19266,19266,-4520, // w21 w20 w17 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	201 12873,4520,-22725,-12873, // w23 w22 w19 w18
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	202 12873,-22725,4520,-12873, // w29 w28 w25 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	203 4520,19266,19266,-22725, // w31 w30 w27 w26
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	204 // %3 for rows 1,7 - constants are multiplied by cos_1_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	205 22725,29692,22725,12299, // movq-> w05 w04 w01 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	206 22725,12299,-22725,-29692, // w07 w06 w03 w02
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	207 22725,-12299,22725,-29692, // w13 w12 w09 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	208 -22725,29692,22725,-12299, // w15 w14 w11 w10
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	209 31521,26722,26722,-6270, // w21 w20 w17 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	210 17855,6270,-31521,-17855, // w23 w22 w19 w18
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	211 17855,-31521,6270,-17855, // w29 w28 w25 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	212 6270,26722,26722,-31521, // w31 w30 w27 w26
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	213 // %3 for rows 2,6 - constants are multiplied by cos_2_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	214 21407,27969,21407,11585, // movq-> w05 w04 w01 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	215 21407,11585,-21407,-27969, // w07 w06 w03 w02
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	216 21407,-11585,21407,-27969, // w13 w12 w09 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	217 -21407,27969,21407,-11585, // w15 w14 w11 w10
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	218 29692,25172,25172,-5906, // w21 w20 w17 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	219 16819,5906,-29692,-16819, // w23 w22 w19 w18
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	220 16819,-29692,5906,-16819, // w29 w28 w25 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	221 5906,25172,25172,-29692, // w31 w30 w27 w26
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	222 // %3 for rows 3,5 - constants are multiplied by cos_3_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	223 19266,25172,19266,10426, // movq-> w05 w04 w01 w00
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	224 19266,10426,-19266,-25172, // w07 w06 w03 w02
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	225 19266,-10426,19266,-25172, // w13 w12 w09 w08
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	226 -19266,25172,19266,-10426, // w15 w14 w11 w10
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	227 26722,22654,22654,-5315, // w21 w20 w17 w16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	228 15137,5315,-26722,-15137, // w23 w22 w19 w18
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	229 15137,-26722,5315,-15137, // w29 w28 w25 w24
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	230 5315,22654,22654,-26722, // w31 w30 w27 w26
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	231 };
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	232 //=============================================================================
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	233 // Helper macros for the code
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	234 //=============================================================================
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	235
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	236 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	237 // DCT_8_INV_ROW_MMX( INP, OUT, TABLE, ROUNDER
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	238 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	239
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	240 #define DCT_8_INV_ROW_MMX(A1,A2,A3,A4)\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	241 "movq " #A1 ",%%mm0 \n\t"/* 0 ; x3 x2 x1 x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	242 "movq 8+" #A1 ",%%mm1 \n\t"/* 1 ; x7 x6 x5 x4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	243 "movq %%mm0,%%mm2 \n\t"/* 2 ; x3 x2 x1 x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	244 "movq " #A3 ",%%mm3 \n\t"/* 3 ; w06 w04 w02 w00*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	245 "punpcklwd %%mm1,%%mm0 \n\t"/* x5 x1 x4 x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	246 "movq %%mm0,%%mm5 \n\t"/* 5 ; x5 x1 x4 x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	247 "punpckldq %%mm0,%%mm0 \n\t"/* x4 x0 x4 x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	248 "movq 8+" #A3 ",%%mm4 \n\t"/* 4 ; w07 w05 w03 w01*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	249 "punpckhwd %%mm1,%%mm2 \n\t"/* 1 ; x7 x3 x6 x2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	250 "pmaddwd %%mm0,%%mm3 \n\t"/* x4w06+x0w04 x4w02+x0w00*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	251 "movq %%mm2,%%mm6 \n\t"/* 6 ; x7 x3 x6 x2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	252 "movq 32+" #A3 ",%%mm1 \n\t"/* 1 ; w22 w20 w18 w16*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	253 "punpckldq %%mm2,%%mm2 \n\t"/* x6 x2 x6 x2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	254 "pmaddwd %%mm2,%%mm4 \n\t"/* x6w07+x2w05 x6w03+x2w01*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	255 "punpckhdq %%mm5,%%mm5 \n\t"/* x5 x1 x5 x1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	256 "pmaddwd 16+" #A3 ",%%mm0 \n\t"/* x4w14+x0w12 x4w10+x0w08*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	257 "punpckhdq %%mm6,%%mm6 \n\t"/* x7 x3 x7 x3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	258 "movq 40+" #A3 ",%%mm7 \n\t"/* 7 ; w23 w21 w19 w17*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	259 "pmaddwd %%mm5,%%mm1 \n\t"/* x5w22+x1w20 x5w18+x1w16*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	260 "paddd " #A4 ",%%mm3 \n\t"/* +%4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	261 "pmaddwd %%mm6,%%mm7 \n\t"/* x7w23+x3w21 x7w19+x3w17*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	262 "pmaddwd 24+" #A3 ",%%mm2 \n\t"/* x6w15+x2w13 x6w11+x2w09*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	263 "paddd %%mm4,%%mm3 \n\t"/* 4 ; a1=sum(even1) a0=sum(even0)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	264 "pmaddwd 48+" #A3 ",%%mm5 \n\t"/* x5w30+x1w28 x5w26+x1w24*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	265 "movq %%mm3,%%mm4 \n\t"/* 4 ; a1 a0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	266 "pmaddwd 56+" #A3 ",%%mm6 \n\t"/* x7w31+x3w29 x7w27+x3w25*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	267 "paddd %%mm7,%%mm1 \n\t"/* 7 ; b1=sum(odd1) b0=sum(odd0)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	268 "paddd " #A4 ",%%mm0 \n\t"/* +%4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	269 "psubd %%mm1,%%mm3 \n\t"/* a1-b1 a0-b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	270 "psrad $11,%%mm3 \n\t"/* y6=a1-b1 y7=a0-b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	271 "paddd %%mm4,%%mm1 \n\t"/* 4 ; a1+b1 a0+b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	272 "paddd %%mm2,%%mm0 \n\t"/* 2 ; a3=sum(even3) a2=sum(even2)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	273 "psrad $11,%%mm1 \n\t"/* y1=a1+b1 y0=a0+b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	274 "paddd %%mm6,%%mm5 \n\t"/* 6 ; b3=sum(odd3) b2=sum(odd2)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	275 "movq %%mm0,%%mm4 \n\t"/* 4 ; a3 a2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	276 "paddd %%mm5,%%mm0 \n\t"/* a3+b3 a2+b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	277 "psubd %%mm5,%%mm4 \n\t"/* 5 ; a3-b3 a2-b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	278 "psrad $11,%%mm0 \n\t"/* y3=a3+b3 y2=a2+b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	279 "psrad $11,%%mm4 \n\t"/* y4=a3-b3 y5=a2-b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	280 "packssdw %%mm0,%%mm1 \n\t"/* 0 ; y3 y2 y1 y0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	281 "packssdw %%mm3,%%mm4 \n\t"/* 3 ; y6 y7 y4 y5*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	282 "movq %%mm4,%%mm7 \n\t"/* 7 ; y6 y7 y4 y5*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	283 "psrld $16,%%mm4 \n\t"/* 0 y6 0 y4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	284 "pslld $16,%%mm7 \n\t"/* y7 0 y5 0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	285 "movq %%mm1," #A2 " \n\t"/* 1 ; save y3 y2 y1 y0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	286 "por %%mm4,%%mm7 \n\t"/* 4 ; y7 y6 y5 y4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	287 "movq %%mm7,8 +" #A2 "\n\t"/* 7 ; save y7 y6 y5 y4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	288
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	289
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	290 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	291 // DCT_8_INV_ROW_XMM( INP, OUT, TABLE, ROUNDER
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	292 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	293
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	294 #define DCT_8_INV_ROW_XMM(A1,A2,A3,A4)\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	295 "movq " #A1 ",%%mm0 \n\t"/* 0 ; x3 x2 x1 x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	296 "movq 8+" #A1 ",%%mm1 \n\t"/* 1 ; x7 x6 x5 x4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	297 "movq %%mm0,%%mm2 \n\t"/* 2 ; x3 x2 x1 x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	298 "movq " #A3 ",%%mm3 \n\t"/* 3 ; w05 w04 w01 w00*/\
3563 a3db61f32f5a Support for Mac OS X Intel, part 3: binary integer constants: gpoirier parents: 3036 diff changeset	299 "pshufw $0x88,%%mm0,%%mm0 \n\t"/* x2 x0 x2 x0*/\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	300 "movq 8+" #A3 ",%%mm4 \n\t"/* 4 ; w07 w06 w03 w02*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	301 "movq %%mm1,%%mm5 \n\t"/* 5 ; x7 x6 x5 x4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	302 "pmaddwd %%mm0,%%mm3 \n\t"/* x2w05+x0w04 x2w01+x0w00*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	303 "movq 32+" #A3 ",%%mm6 \n\t"/* 6 ; w21 w20 w17 w16*/\
3563 a3db61f32f5a Support for Mac OS X Intel, part 3: binary integer constants: gpoirier parents: 3036 diff changeset	304 "pshufw $0x88,%%mm1,%%mm1 \n\t"/* x6 x4 x6 x4*/\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	305 "pmaddwd %%mm1,%%mm4 \n\t"/* x6w07+x4w06 x6w03+x4w02*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	306 "movq 40+" #A3 ",%%mm7 \n\t"/* 7 ; w23 w22 w19 w18*/\
3563 a3db61f32f5a Support for Mac OS X Intel, part 3: binary integer constants: gpoirier parents: 3036 diff changeset	307 "pshufw $0xdd,%%mm2,%%mm2 \n\t"/* x3 x1 x3 x1*/\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	308 "pmaddwd %%mm2,%%mm6 \n\t"/* x3w21+x1w20 x3w17+x1w16*/\
3563 a3db61f32f5a Support for Mac OS X Intel, part 3: binary integer constants: gpoirier parents: 3036 diff changeset	309 "pshufw $0xdd,%%mm5,%%mm5 \n\t"/* x7 x5 x7 x5*/\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	310 "pmaddwd %%mm5,%%mm7 \n\t"/* x7w23+x5w22 x7w19+x5w18*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	311 "paddd " #A4 ",%%mm3 \n\t"/* +%4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	312 "pmaddwd 16+" #A3 ",%%mm0 \n\t"/* x2w13+x0w12 x2w09+x0w08*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	313 "paddd %%mm4,%%mm3 \n\t"/* 4 ; a1=sum(even1) a0=sum(even0)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	314 "pmaddwd 24+" #A3 ",%%mm1 \n\t"/* x6w15+x4w14 x6w11+x4w10*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	315 "movq %%mm3,%%mm4 \n\t"/* 4 ; a1 a0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	316 "pmaddwd 48+" #A3 ",%%mm2 \n\t"/* x3w29+x1w28 x3w25+x1w24*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	317 "paddd %%mm7,%%mm6 \n\t"/* 7 ; b1=sum(odd1) b0=sum(odd0)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	318 "pmaddwd 56+" #A3 ",%%mm5 \n\t"/* x7w31+x5w30 x7w27+x5w26*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	319 "paddd %%mm6,%%mm3 \n\t"/* a1+b1 a0+b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	320 "paddd " #A4 ",%%mm0 \n\t"/* +%4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	321 "psrad $11,%%mm3 \n\t"/* y1=a1+b1 y0=a0+b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	322 "paddd %%mm1,%%mm0 \n\t"/* 1 ; a3=sum(even3) a2=sum(even2)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	323 "psubd %%mm6,%%mm4 \n\t"/* 6 ; a1-b1 a0-b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	324 "movq %%mm0,%%mm7 \n\t"/* 7 ; a3 a2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	325 "paddd %%mm5,%%mm2 \n\t"/* 5 ; b3=sum(odd3) b2=sum(odd2)*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	326 "paddd %%mm2,%%mm0 \n\t"/* a3+b3 a2+b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	327 "psrad $11,%%mm4 \n\t"/* y6=a1-b1 y7=a0-b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	328 "psubd %%mm2,%%mm7 \n\t"/* 2 ; a3-b3 a2-b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	329 "psrad $11,%%mm0 \n\t"/* y3=a3+b3 y2=a2+b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	330 "psrad $11,%%mm7 \n\t"/* y4=a3-b3 y5=a2-b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	331 "packssdw %%mm0,%%mm3 \n\t"/* 0 ; y3 y2 y1 y0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	332 "packssdw %%mm4,%%mm7 \n\t"/* 4 ; y6 y7 y4 y5*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	333 "movq %%mm3, " #A2 " \n\t"/* 3 ; save y3 y2 y1 y0*/\
3563 a3db61f32f5a Support for Mac OS X Intel, part 3: binary integer constants: gpoirier parents: 3036 diff changeset	334 "pshufw $0xb1,%%mm7,%%mm7 \n\t"/* y7 y6 y5 y4*/\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	335 "movq %%mm7,8 +" #A2 "\n\t"/* 7 ; save y7 y6 y5 y4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	336
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	337
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	338 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	339 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	340 // The first stage DCT 8x8 - forward DCTs of columns
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	341 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	342 // The %2puts are multiplied
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	343 // for rows 0,4 - on cos_4_16,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	344 // for rows 1,7 - on cos_1_16,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	345 // for rows 2,6 - on cos_2_16,
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	346 // for rows 3,5 - on cos_3_16
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	347 // and are shifted to the left for rise of accuracy
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	348 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	349 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	350 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	351 // The 8-point scaled forward DCT algorithm (26a8m)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	352 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	353 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	354 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	355 // #define DCT_8_FRW_COL(x, y)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	356 //{
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	357 // short t0, t1, t2, t3, t4, t5, t6, t7;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	358 // short tp03, tm03, tp12, tm12, tp65, tm65;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	359 // short tp465, tm465, tp765, tm765;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	360 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	361 // t0 = LEFT_SHIFT ( x[0] + x[7] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	362 // t1 = LEFT_SHIFT ( x[1] + x[6] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	363 // t2 = LEFT_SHIFT ( x[2] + x[5] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	364 // t3 = LEFT_SHIFT ( x[3] + x[4] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	365 // t4 = LEFT_SHIFT ( x[3] - x[4] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	366 // t5 = LEFT_SHIFT ( x[2] - x[5] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	367 // t6 = LEFT_SHIFT ( x[1] - x[6] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	368 // t7 = LEFT_SHIFT ( x[0] - x[7] );
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	369 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	370 // tp03 = t0 + t3;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	371 // tm03 = t0 - t3;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	372 // tp12 = t1 + t2;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	373 // tm12 = t1 - t2;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	374 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	375 // y[0] = tp03 + tp12;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	376 // y[4] = tp03 - tp12;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	377 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	378 // y[2] = tm03 + tm12 * tg_2_16;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	379 // y[6] = tm03 * tg_2_16 - tm12;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	380 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	381 // tp65 =(t6 +t5 )*cos_4_16;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	382 // tm65 =(t6 -t5 )*cos_4_16;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	383 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	384 // tp765 = t7 + tp65;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	385 // tm765 = t7 - tp65;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	386 // tp465 = t4 + tm65;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	387 // tm465 = t4 - tm65;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	388 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	389 // y[1] = tp765 + tp465 * tg_1_16;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	390 // y[7] = tp765 * tg_1_16 - tp465;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	391 // y[5] = tm765 * tg_3_16 + tm465;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	392 // y[3] = tm765 - tm465 * tg_3_16;
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	393 //}
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	394 //
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	395 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	396
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	397 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	398 // DCT_8_INV_COL_4 INP,OUT
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	399 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	400
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	401 #define DCT_8_INV_COL(A1,A2)\
2870 f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	402 "movq 2*8(%3),%%mm0\n\t"\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	403 "movq 16*3+" #A1 ",%%mm3\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	404 "movq %%mm0,%%mm1 \n\t"/* tg_3_16*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	405 "movq 16*5+" #A1 ",%%mm5\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	406 "pmulhw %%mm3,%%mm0 \n\t"/* x3(tg_3_16-1)/\
2870 f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	407 "movq (%3),%%mm4\n\t"\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	408 "pmulhw %%mm5,%%mm1 \n\t"/* x5(tg_3_16-1)/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	409 "movq 16*7+" #A1 ",%%mm7\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	410 "movq %%mm4,%%mm2 \n\t"/* tg_1_16*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	411 "movq 16*1+" #A1 ",%%mm6\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	412 "pmulhw %%mm7,%%mm4 \n\t"/* x7tg_1_16/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	413 "paddsw %%mm3,%%mm0 \n\t"/* x3tg_3_16/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	414 "pmulhw %%mm6,%%mm2 \n\t"/* x1tg_1_16/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	415 "paddsw %%mm3,%%mm1 \n\t"/* x3+x5(tg_3_16-1)/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	416 "psubsw %%mm5,%%mm0 \n\t"/* x3tg_3_16-x5 = tm35/\
2870 f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	417 "movq 3*8(%3),%%mm3\n\t"\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	418 "paddsw %%mm5,%%mm1 \n\t"/* x3+x5tg_3_16 = tp35/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	419 "paddsw %%mm6,%%mm4 \n\t"/* x1+tg_1_16x7 = tp17/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	420 "psubsw %%mm7,%%mm2 \n\t"/* x1tg_1_16-x7 = tm17/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	421 "movq %%mm4,%%mm5 \n\t"/* tp17*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	422 "movq %%mm2,%%mm6 \n\t"/* tm17*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	423 "paddsw %%mm1,%%mm5 \n\t"/* tp17+tp35 = b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	424 "psubsw %%mm0,%%mm6 \n\t"/* tm17-tm35 = b3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	425 "psubsw %%mm1,%%mm4 \n\t"/* tp17-tp35 = t1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	426 "paddsw %%mm0,%%mm2 \n\t"/* tm17+tm35 = t2*/\
2870 f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	427 "movq 1*8(%3),%%mm7\n\t"\
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	428 "movq %%mm4,%%mm1 \n\t"/* t1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	429 "movq %%mm5,316 +" #A2 "\n\t"/ save b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	430 "paddsw %%mm2,%%mm1 \n\t"/* t1+t2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	431 "movq %%mm6,516 +" #A2 "\n\t"/ save b3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	432 "psubsw %%mm2,%%mm4 \n\t"/* t1-t2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	433 "movq 2*16+" #A1 ",%%mm5\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	434 "movq %%mm7,%%mm0 \n\t"/* tg_2_16*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	435 "movq 6*16+" #A1 ",%%mm6\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	436 "pmulhw %%mm5,%%mm0 \n\t"/* x2tg_2_16/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	437 "pmulhw %%mm6,%%mm7 \n\t"/* x6tg_2_16/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	438 "pmulhw %%mm3,%%mm1 \n\t"/* ocos_4_16(t1+t2) = b1/2/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	439 "movq 0*16+" #A1 ",%%mm2\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	440 "pmulhw %%mm3,%%mm4 \n\t"/* ocos_4_16(t1-t2) = b2/2/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	441 "psubsw %%mm6,%%mm0 \n\t"/* t2tg_2_16-x6 = tm26/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	442 "movq %%mm2,%%mm3 \n\t"/* x0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	443 "movq 4*16+" #A1 ",%%mm6\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	444 "paddsw %%mm5,%%mm7 \n\t"/* x2+x6tg_2_16 = tp26/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	445 "paddsw %%mm6,%%mm2 \n\t"/* x0+x4 = tp04*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	446 "psubsw %%mm6,%%mm3 \n\t"/* x0-x4 = tm04*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	447 "movq %%mm2,%%mm5 \n\t"/* tp04*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	448 "movq %%mm3,%%mm6 \n\t"/* tm04*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	449 "psubsw %%mm7,%%mm2 \n\t"/* tp04-tp26 = a3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	450 "paddsw %%mm0,%%mm3 \n\t"/* tm04+tm26 = a1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	451 "paddsw %%mm1,%%mm1 \n\t"/* b1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	452 "paddsw %%mm4,%%mm4 \n\t"/* b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	453 "paddsw %%mm7,%%mm5 \n\t"/* tp04+tp26 = a0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	454 "psubsw %%mm0,%%mm6 \n\t"/* tm04-tm26 = a2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	455 "movq %%mm3,%%mm7 \n\t"/* a1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	456 "movq %%mm6,%%mm0 \n\t"/* a2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	457 "paddsw %%mm1,%%mm3 \n\t"/* a1+b1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	458 "paddsw %%mm4,%%mm6 \n\t"/* a2+b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	459 "psraw $6,%%mm3 \n\t"/* dst1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	460 "psubsw %%mm1,%%mm7 \n\t"/* a1-b1*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	461 "psraw $6,%%mm6 \n\t"/* dst2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	462 "psubsw %%mm4,%%mm0 \n\t"/* a2-b2*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	463 "movq 316+" #A2 ",%%mm1 \n\t"/ load b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	464 "psraw $6,%%mm7 \n\t"/* dst6*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	465 "movq %%mm5,%%mm4 \n\t"/* a0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	466 "psraw $6,%%mm0 \n\t"/* dst5*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	467 "movq %%mm3,1*16+" #A2 "\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	468 "paddsw %%mm1,%%mm5 \n\t"/* a0+b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	469 "movq %%mm6,2*16+" #A2 "\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	470 "psubsw %%mm1,%%mm4 \n\t"/* a0-b0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	471 "movq 516+" #A2 ",%%mm3 \n\t"/ load b3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	472 "psraw $6,%%mm5 \n\t"/* dst0*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	473 "movq %%mm2,%%mm6 \n\t"/* a3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	474 "psraw $6,%%mm4 \n\t"/* dst7*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	475 "movq %%mm0,5*16+" #A2 "\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	476 "paddsw %%mm3,%%mm2 \n\t"/* a3+b3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	477 "movq %%mm7,6*16+" #A2 "\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	478 "psubsw %%mm3,%%mm6 \n\t"/* a3-b3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	479 "movq %%mm5,0*16+" #A2 "\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	480 "psraw $6,%%mm2 \n\t"/* dst3*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	481 "movq %%mm4,7*16+" #A2 "\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	482 "psraw $6,%%mm6 \n\t"/* dst4*/\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	483 "movq %%mm2,3*16+" #A2 "\n\t"\
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	484 "movq %%mm6,4*16+" #A2 "\n\t"
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	485
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	486 //=============================================================================
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	487 // Code
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	488 //=============================================================================
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	489
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	490 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	491 // void idct_mmx(uint16_t block[64]);
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	492 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	493
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	494
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	495 void ff_idct_xvid_mmx(short *block){
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	496 asm volatile(
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	497 //# Process each row
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	498 DCT_8_INV_ROW_MMX(016(%0), 016(%0), 640(%2), 80(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	499 DCT_8_INV_ROW_MMX(116(%0), 116(%0), 641(%2), 81(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	500 DCT_8_INV_ROW_MMX(216(%0), 216(%0), 642(%2), 82(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	501 DCT_8_INV_ROW_MMX(316(%0), 316(%0), 643(%2), 83(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	502 DCT_8_INV_ROW_MMX(416(%0), 416(%0), 640(%2), 84(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	503 DCT_8_INV_ROW_MMX(516(%0), 516(%0), 643(%2), 85(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	504 DCT_8_INV_ROW_MMX(616(%0), 616(%0), 642(%2), 86(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	505 DCT_8_INV_ROW_MMX(716(%0), 716(%0), 641(%2), 87(%1))
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2870 diff changeset	506
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	507 //# Process the columns (4 at a time)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	508 DCT_8_INV_COL(0(%0), 0(%0))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	509 DCT_8_INV_COL(8(%0), 8(%0))
2870 f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	510 :: "r"(block), "r"(rounder_0), "r"(tab_i_04_mmx), "r"(tg_1_16));
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	511 }
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	512
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	513 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	514 // void idct_xmm(uint16_t block[64]);
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	515 //-----------------------------------------------------------------------------
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	516
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	517
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	518 void ff_idct_xvid_mmx2(short *block){
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	519 asm volatile(
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	520 //# Process each row
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	521 DCT_8_INV_ROW_XMM(016(%0), 016(%0), 640(%2), 80(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	522 DCT_8_INV_ROW_XMM(116(%0), 116(%0), 641(%2), 81(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	523 DCT_8_INV_ROW_XMM(216(%0), 216(%0), 642(%2), 82(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	524 DCT_8_INV_ROW_XMM(316(%0), 316(%0), 643(%2), 83(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	525 DCT_8_INV_ROW_XMM(416(%0), 416(%0), 640(%2), 84(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	526 DCT_8_INV_ROW_XMM(516(%0), 516(%0), 643(%2), 85(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	527 DCT_8_INV_ROW_XMM(616(%0), 616(%0), 642(%2), 86(%1))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	528 DCT_8_INV_ROW_XMM(716(%0), 716(%0), 641(%2), 87(%1))
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2870 diff changeset	529
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	530 //# Process the columns (4 at a time)
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	531 DCT_8_INV_COL(0(%0), 0(%0))
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	532 DCT_8_INV_COL(8(%0), 8(%0))
2870 f7f8a1c2a077 remove unused table entries michael parents: 2868 diff changeset	533 :: "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm), "r"(tg_1_16));
2868 666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	534 }
666064f710d4 xvids mmx&mmx2 idcts michael parents: diff changeset	535

Mercurial > libavcodec.hg

annotate i386/idct_mmx_xvid.c @ 3995:b00c06477dff libavcodec