libavcodec.hg: i386/dsputil

annotate i386/dsputil_mmx.c @ 1708:dea5b2946999 libavcodec

interlaced motion estimation interlaced mpeg2 encoding P & B frames rate distored interlaced mb decission alternate scantable support 4mv encoding fixes (thats also why the regression tests change) passing height to most dsp functions interlaced mpeg4 encoding (no direct mode MBs yet) various related cleanups disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed

author	michael
date	Tue, 30 Dec 2003 16:07:57 +0000
parents	68abbec33289
children	a4a5e7521339

rev	line source
0 986e461dc072 Initial revision glantau parents: diff changeset	1 /*
986e461dc072 Initial revision glantau parents: diff changeset	2 * MMX optimized DSP utils
429 718a22dc121f license/copyright change glantau parents: 422 diff changeset	3 * Copyright (c) 2000, 2001 Fabrice Bellard.
0 986e461dc072 Initial revision glantau parents: diff changeset	4 *
429 718a22dc121f license/copyright change glantau parents: 422 diff changeset	5 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change glantau parents: 422 diff changeset	6 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change glantau parents: 422 diff changeset	7 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change glantau parents: 422 diff changeset	8 * version 2 of the License, or (at your option) any later version.
0 986e461dc072 Initial revision glantau parents: diff changeset	9 *
429 718a22dc121f license/copyright change glantau parents: 422 diff changeset	10 * This library is distributed in the hope that it will be useful,
0 986e461dc072 Initial revision glantau parents: diff changeset	11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429 718a22dc121f license/copyright change glantau parents: 422 diff changeset	12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change glantau parents: 422 diff changeset	13 * Lesser General Public License for more details.
0 986e461dc072 Initial revision glantau parents: diff changeset	14 *
429 718a22dc121f license/copyright change glantau parents: 422 diff changeset	15 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change glantau parents: 422 diff changeset	16 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change glantau parents: 422 diff changeset	17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0 986e461dc072 Initial revision glantau parents: diff changeset	18 *
986e461dc072 Initial revision glantau parents: diff changeset	19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
986e461dc072 Initial revision glantau parents: diff changeset	20 */
986e461dc072 Initial revision glantau parents: diff changeset	21
986e461dc072 Initial revision glantau parents: diff changeset	22 #include "../dsputil.h"
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	23 #include "../simple_idct.h"
0 986e461dc072 Initial revision glantau parents: diff changeset	24
1647 c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	25 extern const uint8_t ff_h263_loop_filter_strength[32];
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	26
5 4479bcab253e suppressed no longer needed emms() glantau parents: 0 diff changeset	27 int mm_flags; /* multimedia extension flags */
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	28
0 986e461dc072 Initial revision glantau parents: diff changeset	29 /* pixel operations */
387 b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	30 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	31 static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	32 static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
0 986e461dc072 Initial revision glantau parents: diff changeset	33
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	34 static const uint64_t ff_pw_20 __attribute__ ((aligned(8))) = 0x0014001400140014ULL;
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	35 static const uint64_t ff_pw_3 __attribute__ ((aligned(8))) = 0x0003000300030003ULL;
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	36 static const uint64_t ff_pw_16 __attribute__ ((aligned(8))) = 0x0010001000100010ULL;
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	37 static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000FULL;
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	38
1647 c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	39 static const uint64_t ff_pb_FC __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	40
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	41 #define JUMPALIGN() __asm __volatile (".balign 8"::)
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	42 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	43
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	44 #define MOVQ_WONE(regd) \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	45 __asm __volatile ( \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	46 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	47 "psrlw $15, %%" #regd ::)
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	48
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	49 #define MOVQ_BFE(regd) \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	50 __asm __volatile ( \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	51 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	52 "paddb %%" #regd ", %%" #regd " \n\t" ::)
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	53
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	54 #ifndef PIC
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	55 #define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone))
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	56 #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	57 #else
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	58 // for shared library it's better to use this way for accessing constants
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	59 // pcmpeqd -> -1
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	60 #define MOVQ_BONE(regd) \
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	61 __asm __volatile ( \
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	62 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	63 "psrlw $15, %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	64 "packuswb %%" #regd ", %%" #regd " \n\t" ::)
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	65
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	66 #define MOVQ_WTWO(regd) \
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	67 __asm __volatile ( \
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	68 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	69 "psrlw $15, %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	70 "psllw $1, %%" #regd " \n\t"::)
387 b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	71
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	72 #endif
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	73
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	74 // using regr as temporary and for the output result
444 a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC kabi parents: 438 diff changeset	75 // first argument is unmodifed and second is trashed
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	76 // regfe is supposed to contain 0xfefefefefefefefe
d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	77 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	78 "movq " #rega ", " #regr " \n\t"\
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	79 "pand " #regb ", " #regr " \n\t"\
444 a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC kabi parents: 438 diff changeset	80 "pxor " #rega ", " #regb " \n\t"\
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	81 "pand " #regfe "," #regb " \n\t"\
444 a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC kabi parents: 438 diff changeset	82 "psrlq $1, " #regb " \n\t"\
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	83 "paddb " #regb ", " #regr " \n\t"
444 a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC kabi parents: 438 diff changeset	84
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	85 #define PAVGB_MMX(rega, regb, regr, regfe) \
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	86 "movq " #rega ", " #regr " \n\t"\
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	87 "por " #regb ", " #regr " \n\t"\
444 a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC kabi parents: 438 diff changeset	88 "pxor " #rega ", " #regb " \n\t"\
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	89 "pand " #regfe "," #regb " \n\t"\
444 a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC kabi parents: 438 diff changeset	90 "psrlq $1, " #regb " \n\t"\
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	91 "psubb " #regb ", " #regr " \n\t"
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	92
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	93 // mm6 is supposed to contain 0xfefefefefefefefe
446 efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	94 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	95 "movq " #rega ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	96 "movq " #regc ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	97 "pand " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	98 "pand " #regd ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	99 "pxor " #rega ", " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	100 "pxor " #regc ", " #regd " \n\t"\
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	101 "pand %%mm6, " #regb " \n\t"\
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	102 "pand %%mm6, " #regd " \n\t"\
446 efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	103 "psrlq $1, " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	104 "psrlq $1, " #regd " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	105 "paddb " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	106 "paddb " #regd ", " #regp " \n\t"
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	107
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	108 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	109 "movq " #rega ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	110 "movq " #regc ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	111 "por " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	112 "por " #regd ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	113 "pxor " #rega ", " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	114 "pxor " #regc ", " #regd " \n\t"\
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	115 "pand %%mm6, " #regb " \n\t"\
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	116 "pand %%mm6, " #regd " \n\t"\
446 efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	117 "psrlq $1, " #regd " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	118 "psrlq $1, " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	119 "psubb " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	120 "psubb " #regd ", " #regp " \n\t"
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	121
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	122 /***********************************/
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	123 /* MMX no rounding */
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	124 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	125 #define SET_RND MOVQ_WONE
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	126 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	127 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	128
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	129 #include "dsputil_mmx_rnd.h"
444 a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC kabi parents: 438 diff changeset	130
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	131 #undef DEF
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	132 #undef SET_RND
446 efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	133 #undef PAVGBP
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	134 #undef PAVGB
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	135 /***********************************/
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	136 /* MMX rounding */
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	137
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	138 #define DEF(x, y) x ## _ ## y ##_mmx
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	139 #define SET_RND MOVQ_WTWO
e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	140 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	141 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
445 62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	142
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	143 #include "dsputil_mmx_rnd.h"
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	144
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file kabi parents: 444 diff changeset	145 #undef DEF
448 e8c8ca9106aa * removed MANGLE from macros for setting constants kabi parents: 446 diff changeset	146 #undef SET_RND
446 efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with kabi parents: 445 diff changeset	147 #undef PAVGBP
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	148 #undef PAVGB
387 b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	149
0 986e461dc072 Initial revision glantau parents: diff changeset	150 /***********************************/
986e461dc072 Initial revision glantau parents: diff changeset	151 /* 3Dnow specific */
986e461dc072 Initial revision glantau parents: diff changeset	152
986e461dc072 Initial revision glantau parents: diff changeset	153 #define DEF(x) x ## _3dnow
986e461dc072 Initial revision glantau parents: diff changeset	154 /* for Athlons PAVGUSB is prefered */
986e461dc072 Initial revision glantau parents: diff changeset	155 #define PAVGB "pavgusb"
986e461dc072 Initial revision glantau parents: diff changeset	156
986e461dc072 Initial revision glantau parents: diff changeset	157 #include "dsputil_mmx_avg.h"
986e461dc072 Initial revision glantau parents: diff changeset	158
986e461dc072 Initial revision glantau parents: diff changeset	159 #undef DEF
986e461dc072 Initial revision glantau parents: diff changeset	160 #undef PAVGB
986e461dc072 Initial revision glantau parents: diff changeset	161
986e461dc072 Initial revision glantau parents: diff changeset	162 /***********************************/
986e461dc072 Initial revision glantau parents: diff changeset	163 /* MMX2 specific */
986e461dc072 Initial revision glantau parents: diff changeset	164
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	165 #define DEF(x) x ## _mmx2
0 986e461dc072 Initial revision glantau parents: diff changeset	166
986e461dc072 Initial revision glantau parents: diff changeset	167 /* Introduced only in MMX2 set */
986e461dc072 Initial revision glantau parents: diff changeset	168 #define PAVGB "pavgb"
986e461dc072 Initial revision glantau parents: diff changeset	169
986e461dc072 Initial revision glantau parents: diff changeset	170 #include "dsputil_mmx_avg.h"
986e461dc072 Initial revision glantau parents: diff changeset	171
986e461dc072 Initial revision glantau parents: diff changeset	172 #undef DEF
986e461dc072 Initial revision glantau parents: diff changeset	173 #undef PAVGB
986e461dc072 Initial revision glantau parents: diff changeset	174
986e461dc072 Initial revision glantau parents: diff changeset	175 /***********************************/
986e461dc072 Initial revision glantau parents: diff changeset	176 /* standard MMX */
986e461dc072 Initial revision glantau parents: diff changeset	177
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	178 #ifdef CONFIG_ENCODERS
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	179 static void get_pixels_mmx(DCTELEM block, const uint8_t pixels, int line_size)
0 986e461dc072 Initial revision glantau parents: diff changeset	180 {
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	181 asm volatile(
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	182 "movl $-128, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	183 "pxor %%mm7, %%mm7 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	184 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	185 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	186 "movq (%0), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	187 "movq (%0, %2), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	188 "movq %%mm0, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	189 "movq %%mm2, %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	190 "punpcklbw %%mm7, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	191 "punpckhbw %%mm7, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	192 "punpcklbw %%mm7, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	193 "punpckhbw %%mm7, %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	194 "movq %%mm0, (%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	195 "movq %%mm1, 8(%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	196 "movq %%mm2, 16(%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	197 "movq %%mm3, 24(%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	198 "addl %3, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	199 "addl $32, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	200 "js 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	201 : "+r" (pixels)
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	202 : "r" (block+64), "r" (line_size), "r" (line_size*2)
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	203 : "%eax"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	204 );
0 986e461dc072 Initial revision glantau parents: diff changeset	205 }
986e461dc072 Initial revision glantau parents: diff changeset	206
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	207 static inline void diff_pixels_mmx(DCTELEM block, const uint8_t s1, const uint8_t *s2, int stride)
324 9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	208 {
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	209 asm volatile(
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	210 "pxor %%mm7, %%mm7 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 342 diff changeset	211 "movl $-128, %%eax \n\t"
324 9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	212 ".balign 16 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	213 "1: \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	214 "movq (%0), %%mm0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	215 "movq (%1), %%mm2 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	216 "movq %%mm0, %%mm1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	217 "movq %%mm2, %%mm3 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	218 "punpcklbw %%mm7, %%mm0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	219 "punpckhbw %%mm7, %%mm1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	220 "punpcklbw %%mm7, %%mm2 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	221 "punpckhbw %%mm7, %%mm3 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	222 "psubw %%mm2, %%mm0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	223 "psubw %%mm3, %%mm1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	224 "movq %%mm0, (%2, %%eax)\n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	225 "movq %%mm1, 8(%2, %%eax)\n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	226 "addl %3, %0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	227 "addl %3, %1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	228 "addl $16, %%eax \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	229 "jnz 1b \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	230 : "+r" (s1), "+r" (s2)
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	231 : "r" (block+64), "r" (stride)
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	232 : "%eax"
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	233 );
9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	234 }
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	235 #endif //CONFIG_ENCODERS
324 9c6f056f0e41 fixed mpeg4 time stuff on encoding michaelni parents: 296 diff changeset	236
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	237 void put_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size)
0 986e461dc072 Initial revision glantau parents: diff changeset	238 {
986e461dc072 Initial revision glantau parents: diff changeset	239 const DCTELEM *p;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	240 uint8_t *pix;
0 986e461dc072 Initial revision glantau parents: diff changeset	241
986e461dc072 Initial revision glantau parents: diff changeset	242 /* read the pixels */
986e461dc072 Initial revision glantau parents: diff changeset	243 p = block;
986e461dc072 Initial revision glantau parents: diff changeset	244 pix = pixels;
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	245 /* unrolled loop */
0 986e461dc072 Initial revision glantau parents: diff changeset	246 __asm __volatile(
151 ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	247 "movq %3, %%mm0\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	248 "movq 8%3, %%mm1\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	249 "movq 16%3, %%mm2\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	250 "movq 24%3, %%mm3\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	251 "movq 32%3, %%mm4\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	252 "movq 40%3, %%mm5\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	253 "movq 48%3, %%mm6\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	254 "movq 56%3, %%mm7\n\t"
0 986e461dc072 Initial revision glantau parents: diff changeset	255 "packuswb %%mm1, %%mm0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	256 "packuswb %%mm3, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	257 "packuswb %%mm5, %%mm4\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	258 "packuswb %%mm7, %%mm6\n\t"
151 ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	259 "movq %%mm0, (%0)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	260 "movq %%mm2, (%0, %1)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	261 "movq %%mm4, (%0, %1, 2)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	262 "movq %%mm6, (%0, %2)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	263 ::"r" (pix), "r" (line_size), "r" (line_size3), "m"(p)
0 986e461dc072 Initial revision glantau parents: diff changeset	264 :"memory");
986e461dc072 Initial revision glantau parents: diff changeset	265 pix += line_size*4;
986e461dc072 Initial revision glantau parents: diff changeset	266 p += 32;
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	267
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	268 // if here would be an exact copy of the code above
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	269 // compiler would generate some very strange code
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	270 // thus using "r"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	271 __asm __volatile(
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	272 "movq (%3), %%mm0\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	273 "movq 8(%3), %%mm1\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	274 "movq 16(%3), %%mm2\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	275 "movq 24(%3), %%mm3\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	276 "movq 32(%3), %%mm4\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	277 "movq 40(%3), %%mm5\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	278 "movq 48(%3), %%mm6\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	279 "movq 56(%3), %%mm7\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	280 "packuswb %%mm1, %%mm0\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	281 "packuswb %%mm3, %%mm2\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	282 "packuswb %%mm5, %%mm4\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	283 "packuswb %%mm7, %%mm6\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	284 "movq %%mm0, (%0)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	285 "movq %%mm2, (%0, %1)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	286 "movq %%mm4, (%0, %1, 2)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	287 "movq %%mm6, (%0, %2)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	288 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p)
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	289 :"memory");
0 986e461dc072 Initial revision glantau parents: diff changeset	290 }
986e461dc072 Initial revision glantau parents: diff changeset	291
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	292 void add_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size)
0 986e461dc072 Initial revision glantau parents: diff changeset	293 {
986e461dc072 Initial revision glantau parents: diff changeset	294 const DCTELEM *p;
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	295 uint8_t *pix;
0 986e461dc072 Initial revision glantau parents: diff changeset	296 int i;
986e461dc072 Initial revision glantau parents: diff changeset	297
986e461dc072 Initial revision glantau parents: diff changeset	298 /* read the pixels */
986e461dc072 Initial revision glantau parents: diff changeset	299 p = block;
986e461dc072 Initial revision glantau parents: diff changeset	300 pix = pixels;
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	301 MOVQ_ZERO(mm7);
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	302 i = 4;
342 8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler kabi parents: 324 diff changeset	303 do {
0 986e461dc072 Initial revision glantau parents: diff changeset	304 __asm __volatile(
342 8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler kabi parents: 324 diff changeset	305 "movq (%2), %%mm0\n\t"
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler kabi parents: 324 diff changeset	306 "movq 8(%2), %%mm1\n\t"
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler kabi parents: 324 diff changeset	307 "movq 16(%2), %%mm2\n\t"
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler kabi parents: 324 diff changeset	308 "movq 24(%2), %%mm3\n\t"
0 986e461dc072 Initial revision glantau parents: diff changeset	309 "movq %0, %%mm4\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	310 "movq %1, %%mm6\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	311 "movq %%mm4, %%mm5\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	312 "punpcklbw %%mm7, %%mm4\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	313 "punpckhbw %%mm7, %%mm5\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	314 "paddsw %%mm4, %%mm0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	315 "paddsw %%mm5, %%mm1\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	316 "movq %%mm6, %%mm5\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	317 "punpcklbw %%mm7, %%mm6\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	318 "punpckhbw %%mm7, %%mm5\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	319 "paddsw %%mm6, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	320 "paddsw %%mm5, %%mm3\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	321 "packuswb %%mm1, %%mm0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	322 "packuswb %%mm3, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	323 "movq %%mm0, %0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	324 "movq %%mm2, %1\n\t"
151 ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 42 diff changeset	325 :"+m"(pix), "+m"((pix+line_size))
342 8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler kabi parents: 324 diff changeset	326 :"r"(p)
0 986e461dc072 Initial revision glantau parents: diff changeset	327 :"memory");
986e461dc072 Initial revision glantau parents: diff changeset	328 pix += line_size*2;
986e461dc072 Initial revision glantau parents: diff changeset	329 p += 16;
342 8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler kabi parents: 324 diff changeset	330 } while (--i);
0 986e461dc072 Initial revision glantau parents: diff changeset	331 }
986e461dc072 Initial revision glantau parents: diff changeset	332
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	333 static void put_pixels8_mmx(uint8_t block, const uint8_t pixels, int line_size, int h)
0 986e461dc072 Initial revision glantau parents: diff changeset	334 {
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	335 __asm __volatile(
420 bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	336 "lea (%3, %3), %%eax \n\t"
422 aa4a1c6209bd * baling 8 seems to have the same speed kabi parents: 421 diff changeset	337 ".balign 8 \n\t"
420 bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	338 "1: \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	339 "movq (%1), %%mm0 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	340 "movq (%1, %3), %%mm1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	341 "movq %%mm0, (%2) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	342 "movq %%mm1, (%2, %3) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	343 "addl %%eax, %1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	344 "addl %%eax, %2 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	345 "movq (%1), %%mm0 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	346 "movq (%1, %3), %%mm1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	347 "movq %%mm0, (%2) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	348 "movq %%mm1, (%2, %3) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	349 "addl %%eax, %1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	350 "addl %%eax, %2 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	351 "subl $4, %0 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	352 "jnz 1b \n\t"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	353 : "+g"(h), "+r" (pixels), "+r" (block)
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	354 : "r"(line_size)
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	355 : "%eax", "memory"
bbaf743f353f * cleanup for put_pixels_mmx kabi parents: 418 diff changeset	356 );
0 986e461dc072 Initial revision glantau parents: diff changeset	357 }
986e461dc072 Initial revision glantau parents: diff changeset	358
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	359 static void put_pixels16_mmx(uint8_t block, const uint8_t pixels, int line_size, int h)
651 45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	360 {
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	361 __asm __volatile(
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	362 "lea (%3, %3), %%eax \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	363 ".balign 8 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	364 "1: \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	365 "movq (%1), %%mm0 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	366 "movq 8(%1), %%mm4 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	367 "movq (%1, %3), %%mm1 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	368 "movq 8(%1, %3), %%mm5 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	369 "movq %%mm0, (%2) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	370 "movq %%mm4, 8(%2) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	371 "movq %%mm1, (%2, %3) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	372 "movq %%mm5, 8(%2, %3) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	373 "addl %%eax, %1 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	374 "addl %%eax, %2 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	375 "movq (%1), %%mm0 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	376 "movq 8(%1), %%mm4 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	377 "movq (%1, %3), %%mm1 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	378 "movq 8(%1, %3), %%mm5 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	379 "movq %%mm0, (%2) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	380 "movq %%mm4, 8(%2) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	381 "movq %%mm1, (%2, %3) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	382 "movq %%mm5, 8(%2, %3) \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	383 "addl %%eax, %1 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	384 "addl %%eax, %2 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	385 "subl $4, %0 \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	386 "jnz 1b \n\t"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	387 : "+g"(h), "+r" (pixels), "+r" (block)
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	388 : "r"(line_size)
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	389 : "%eax", "memory"
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	390 );
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	391 }
45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	392
296 c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	393 static void clear_blocks_mmx(DCTELEM *blocks)
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	394 {
471 d7f65ea52aaa * reimplemented remaing avg_ pixel functions kabi parents: 448 diff changeset	395 __asm __volatile(
296 c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	396 "pxor %%mm7, %%mm7 \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	397 "movl $-128*6, %%eax \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	398 "1: \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	399 "movq %%mm7, (%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	400 "movq %%mm7, 8(%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	401 "movq %%mm7, 16(%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	402 "movq %%mm7, 24(%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	403 "addl $32, %%eax \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	404 " js 1b \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	405 : : "r" (((int)blocks)+128*6)
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	406 : "%eax"
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	407 );
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	408 }
c1a8a1b4a24b sizeof(s->block) isnt 6462 anymore bugfix michaelni parents: 294 diff changeset	409
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	410 #ifdef CONFIG_ENCODERS
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	411 static int pix_sum16_mmx(uint8_t * pix, int line_size){
688 894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	412 const int h=16;
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	413 int sum;
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	414 int index= -line_size*h;
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	415
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	416 __asm __volatile(
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	417 "pxor %%mm7, %%mm7 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	418 "pxor %%mm6, %%mm6 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	419 "1: \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	420 "movq (%2, %1), %%mm0 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	421 "movq (%2, %1), %%mm1 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	422 "movq 8(%2, %1), %%mm2 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	423 "movq 8(%2, %1), %%mm3 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	424 "punpcklbw %%mm7, %%mm0 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	425 "punpckhbw %%mm7, %%mm1 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	426 "punpcklbw %%mm7, %%mm2 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	427 "punpckhbw %%mm7, %%mm3 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	428 "paddw %%mm0, %%mm1 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	429 "paddw %%mm2, %%mm3 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	430 "paddw %%mm1, %%mm3 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	431 "paddw %%mm3, %%mm6 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	432 "addl %3, %1 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	433 " js 1b \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	434 "movq %%mm6, %%mm5 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	435 "psrlq $32, %%mm6 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	436 "paddw %%mm5, %%mm6 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	437 "movq %%mm6, %%mm5 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	438 "psrlq $16, %%mm6 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	439 "paddw %%mm5, %%mm6 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	440 "movd %%mm6, %0 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	441 "andl $0xFFFF, %0 \n\t"
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	442 : "=&r" (sum), "+r" (index)
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	443 : "r" (pix - index), "r" (line_size)
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	444 );
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	445
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	446 return sum;
894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	447 }
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	448 #endif //CONFIG_ENCODERS
688 894b61908734 pix_sum16_mmx() michaelni parents: 651 diff changeset	449
866 725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	450 static void add_bytes_mmx(uint8_t dst, uint8_t src, int w){
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	451 int i=0;
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	452 asm volatile(
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	453 "1: \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	454 "movq (%1, %0), %%mm0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	455 "movq (%2, %0), %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	456 "paddb %%mm0, %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	457 "movq %%mm1, (%2, %0) \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	458 "movq 8(%1, %0), %%mm0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	459 "movq 8(%2, %0), %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	460 "paddb %%mm0, %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	461 "movq %%mm1, 8(%2, %0) \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	462 "addl $16, %0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	463 "cmpl %3, %0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	464 " jb 1b \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	465 : "+r" (i)
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	466 : "r"(src), "r"(dst), "r"(w-15)
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	467 );
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	468 for(; i<w; i++)
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	469 dst[i+0] += src[i+0];
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	470 }
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	471
1648 de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	472 #define H263_LOOP_FILTER \
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	473 "pxor %%mm7, %%mm7 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	474 "movq %0, %%mm0 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	475 "movq %0, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	476 "movq %3, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	477 "movq %3, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	478 "punpcklbw %%mm7, %%mm0 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	479 "punpckhbw %%mm7, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	480 "punpcklbw %%mm7, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	481 "punpckhbw %%mm7, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	482 "psubw %%mm2, %%mm0 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	483 "psubw %%mm3, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	484 "movq %1, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	485 "movq %1, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	486 "movq %2, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	487 "movq %2, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	488 "punpcklbw %%mm7, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	489 "punpckhbw %%mm7, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	490 "punpcklbw %%mm7, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	491 "punpckhbw %%mm7, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	492 "psubw %%mm2, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	493 "psubw %%mm3, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	494 "psllw $2, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	495 "psllw $2, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	496 "paddw %%mm0, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	497 "paddw %%mm1, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	498 "pxor %%mm6, %%mm6 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	499 "pcmpgtw %%mm4, %%mm6 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	500 "pcmpgtw %%mm5, %%mm7 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	501 "pxor %%mm6, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	502 "pxor %%mm7, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	503 "psubw %%mm6, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	504 "psubw %%mm7, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	505 "psrlw $3, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	506 "psrlw $3, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	507 "packuswb %%mm5, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	508 "packsswb %%mm7, %%mm6 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	509 "pxor %%mm7, %%mm7 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	510 "movd %4, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	511 "punpcklbw %%mm2, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	512 "punpcklbw %%mm2, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	513 "punpcklbw %%mm2, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	514 "psubusb %%mm4, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	515 "movq %%mm2, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	516 "psubusb %%mm4, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	517 "psubb %%mm3, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	518 "movq %1, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	519 "movq %2, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	520 "pxor %%mm6, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	521 "pxor %%mm6, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	522 "paddusb %%mm2, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	523 "psubusb %%mm2, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	524 "pxor %%mm6, %%mm3 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	525 "pxor %%mm6, %%mm4 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	526 "paddusb %%mm2, %%mm2 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	527 "packsswb %%mm1, %%mm0 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	528 "pcmpgtb %%mm0, %%mm7 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	529 "pxor %%mm7, %%mm0 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	530 "psubb %%mm7, %%mm0 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	531 "movq %%mm0, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	532 "psubusb %%mm2, %%mm0 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	533 "psubb %%mm0, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	534 "pand %5, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	535 "psrlw $2, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	536 "pxor %%mm7, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	537 "psubb %%mm7, %%mm1 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	538 "movq %0, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	539 "movq %3, %%mm6 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	540 "psubb %%mm1, %%mm5 \n\t"\
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	541 "paddb %%mm1, %%mm6 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	542
1647 c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	543 static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	544 const int strength= ff_h263_loop_filter_strength[qscale];
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	545
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	546 asm volatile(
1648 de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	547
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	548 H263_LOOP_FILTER
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	549
1647 c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	550 "movq %%mm3, %1 \n\t"
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	551 "movq %%mm4, %2 \n\t"
1648 de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	552 "movq %%mm5, %0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	553 "movq %%mm6, %3 \n\t"
1647 c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	554 : "+m" ((uint64_t)(src - 2*stride)),
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	555 "+m" ((uint64_t)(src - 1*stride)),
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	556 "+m" ((uint64_t)(src + 0*stride)),
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	557 "+m" ((uint64_t)(src + 1*stride))
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	558 : "g" (2*strength), "m"(ff_pb_FC)
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	559 );
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	560 }
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	561
1648 de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	562 static inline void transpose4x4(uint8_t dst, uint8_t src, int dst_stride, int src_stride){
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	563 asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	564 "movd %4, %%mm0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	565 "movd %5, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	566 "movd %6, %%mm2 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	567 "movd %7, %%mm3 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	568 "punpcklbw %%mm1, %%mm0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	569 "punpcklbw %%mm3, %%mm2 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	570 "movq %%mm0, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	571 "punpcklwd %%mm2, %%mm0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	572 "punpckhwd %%mm2, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	573 "movd %%mm0, %0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	574 "punpckhdq %%mm0, %%mm0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	575 "movd %%mm0, %1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	576 "movd %%mm1, %2 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	577 "punpckhdq %%mm1, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	578 "movd %%mm1, %3 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	579
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	580 : "=m" ((uint32_t)(dst + 0*dst_stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	581 "=m" ((uint32_t)(dst + 1*dst_stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	582 "=m" ((uint32_t)(dst + 2*dst_stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	583 "=m" ((uint32_t)(dst + 3*dst_stride))
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	584 : "m" ((uint32_t)(src + 0*src_stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	585 "m" ((uint32_t)(src + 1*src_stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	586 "m" ((uint32_t)(src + 2*src_stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	587 "m" ((uint32_t)(src + 3*src_stride))
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	588 );
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	589 }
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	590
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	591 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	592 const int strength= ff_h263_loop_filter_strength[qscale];
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	593 uint64_t temp[4] __attribute__ ((aligned(8)));
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	594 uint8_t btemp= (uint8_t)temp;
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	595
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	596 src -= 2;
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	597
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	598 transpose4x4(btemp , src , 8, stride);
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	599 transpose4x4(btemp+4, src + 4*stride, 8, stride);
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	600 asm volatile(
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	601 H263_LOOP_FILTER // 5 3 4 6
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	602
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	603 : "+m" (temp[0]),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	604 "+m" (temp[1]),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	605 "+m" (temp[2]),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	606 "+m" (temp[3])
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	607 : "g" (2*strength), "m"(ff_pb_FC)
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	608 );
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	609
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	610 asm volatile(
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	611 "movq %%mm5, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	612 "movq %%mm4, %%mm0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	613 "punpcklbw %%mm3, %%mm5 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	614 "punpcklbw %%mm6, %%mm4 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	615 "punpckhbw %%mm3, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	616 "punpckhbw %%mm6, %%mm0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	617 "movq %%mm5, %%mm3 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	618 "movq %%mm1, %%mm6 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	619 "punpcklwd %%mm4, %%mm5 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	620 "punpcklwd %%mm0, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	621 "punpckhwd %%mm4, %%mm3 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	622 "punpckhwd %%mm0, %%mm6 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	623 "movd %%mm5, %0 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	624 "punpckhdq %%mm5, %%mm5 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	625 "movd %%mm5, %1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	626 "movd %%mm3, %2 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	627 "punpckhdq %%mm3, %%mm3 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	628 "movd %%mm3, %3 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	629 "movd %%mm1, %4 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	630 "punpckhdq %%mm1, %%mm1 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	631 "movd %%mm1, %5 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	632 "movd %%mm6, %6 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	633 "punpckhdq %%mm6, %%mm6 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	634 "movd %%mm6, %7 \n\t"
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	635 : "=m" ((uint32_t)(src + 0*stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	636 "=m" ((uint32_t)(src + 1*stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	637 "=m" ((uint32_t)(src + 2*stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	638 "=m" ((uint32_t)(src + 3*stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	639 "=m" ((uint32_t)(src + 4*stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	640 "=m" ((uint32_t)(src + 5*stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	641 "=m" ((uint32_t)(src + 6*stride)),
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	642 "=m" ((uint32_t)(src + 7*stride))
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	643 );
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	644 }
de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	645
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	646 #ifdef CONFIG_ENCODERS
997 4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	647 static int pix_norm1_mmx(uint8_t *pix, int line_size) {
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	648 int tmp;
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	649 asm volatile (
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	650 "movl $16,%%ecx\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	651 "pxor %%mm0,%%mm0\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	652 "pxor %%mm7,%%mm7\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	653 "1:\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	654 "movq (%0),%%mm2\n" /* mm2 = pix[0-7] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	655 "movq 8(%0),%%mm3\n" /* mm3 = pix[8-15] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	656
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	657 "movq %%mm2,%%mm1\n" /* mm1 = mm2 = pix[0-7] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	658
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	659 "punpckhbw %%mm0,%%mm1\n" /* mm1 = [pix4-7] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	660 "punpcklbw %%mm0,%%mm2\n" /* mm2 = [pix0-3] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	661
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	662 "movq %%mm3,%%mm4\n" /* mm4 = mm3 = pix[8-15] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	663 "punpckhbw %%mm0,%%mm3\n" /* mm3 = [pix12-15] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	664 "punpcklbw %%mm0,%%mm4\n" /* mm4 = [pix8-11] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	665
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	666 "pmaddwd %%mm1,%%mm1\n" /* mm1 = (pix0^2+pix1^2,pix2^2+pix3^2) */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	667 "pmaddwd %%mm2,%%mm2\n" /* mm2 = (pix4^2+pix5^2,pix6^2+pix7^2) */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	668
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	669 "pmaddwd %%mm3,%%mm3\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	670 "pmaddwd %%mm4,%%mm4\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	671
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	672 "paddd %%mm1,%%mm2\n" /* mm2 = (pix0^2+pix1^2+pix4^2+pix5^2,
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	673 pix2^2+pix3^2+pix6^2+pix7^2) */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	674 "paddd %%mm3,%%mm4\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	675 "paddd %%mm2,%%mm7\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	676
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	677 "addl %2, %0\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	678 "paddd %%mm4,%%mm7\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	679 "dec %%ecx\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	680 "jnz 1b\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	681
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	682 "movq %%mm7,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	683 "psrlq $32, %%mm7\n" /* shift hi dword to lo */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	684 "paddd %%mm7,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	685 "movd %%mm1,%1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	686 : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" );
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	687 return tmp;
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	688 }
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	689
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	690 static int sse16_mmx(void v, uint8_t pix1, uint8_t * pix2, int line_size, int h) {
997 4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	691 int tmp;
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	692 asm volatile (
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	693 "movl %4,%%ecx\n"
997 4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	694 "pxor %%mm0,%%mm0\n" /* mm0 = 0 */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	695 "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	696 "1:\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	697 "movq (%0),%%mm1\n" /* mm1 = pix1[0-7] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	698 "movq (%1),%%mm2\n" /* mm2 = pix2[0-7] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	699 "movq 8(%0),%%mm3\n" /* mm3 = pix1[8-15] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	700 "movq 8(%1),%%mm4\n" /* mm4 = pix2[8-15] */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	701
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	702 /* todo: mm1-mm2, mm3-mm4 */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	703 /* algo: substract mm1 from mm2 with saturation and vice versa */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	704 /* OR the results to get absolute difference */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	705 "movq %%mm1,%%mm5\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	706 "movq %%mm3,%%mm6\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	707 "psubusb %%mm2,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	708 "psubusb %%mm4,%%mm3\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	709 "psubusb %%mm5,%%mm2\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	710 "psubusb %%mm6,%%mm4\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	711
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	712 "por %%mm1,%%mm2\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	713 "por %%mm3,%%mm4\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	714
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	715 /* now convert to 16-bit vectors so we can square them */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	716 "movq %%mm2,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	717 "movq %%mm4,%%mm3\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	718
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	719 "punpckhbw %%mm0,%%mm2\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	720 "punpckhbw %%mm0,%%mm4\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	721 "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	722 "punpcklbw %%mm0,%%mm3\n" /* mm4 now spread over (mm3,mm4) */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	723
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	724 "pmaddwd %%mm2,%%mm2\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	725 "pmaddwd %%mm4,%%mm4\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	726 "pmaddwd %%mm1,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	727 "pmaddwd %%mm3,%%mm3\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	728
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	729 "addl %3,%0\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	730 "addl %3,%1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	731
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	732 "paddd %%mm2,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	733 "paddd %%mm4,%%mm3\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	734 "paddd %%mm1,%%mm7\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	735 "paddd %%mm3,%%mm7\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	736
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	737 "decl %%ecx\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	738 "jnz 1b\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	739
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	740 "movq %%mm7,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	741 "psrlq $32, %%mm7\n" /* shift hi dword to lo */
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	742 "paddd %%mm7,%%mm1\n"
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	743 "movd %%mm1,%2\n"
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	744 : "+r" (pix1), "+r" (pix2), "=r"(tmp)
dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	745 : "r" (line_size) , "m" (h)
dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	746 : "%ecx");
997 4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	747 return tmp;
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	748 }
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	749
866 725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	750 static void diff_bytes_mmx(uint8_t dst, uint8_t src1, uint8_t *src2, int w){
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	751 int i=0;
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	752 asm volatile(
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	753 "1: \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	754 "movq (%2, %0), %%mm0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	755 "movq (%1, %0), %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	756 "psubb %%mm0, %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	757 "movq %%mm1, (%3, %0) \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	758 "movq 8(%2, %0), %%mm0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	759 "movq 8(%1, %0), %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	760 "psubb %%mm0, %%mm1 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	761 "movq %%mm1, 8(%3, %0) \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	762 "addl $16, %0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	763 "cmpl %4, %0 \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	764 " jb 1b \n\t"
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	765 : "+r" (i)
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	766 : "r"(src1), "r"(src2), "r"(dst), "r"(w-15)
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	767 );
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	768 for(; i<w; i++)
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	769 dst[i+0] = src1[i+0]-src2[i+0];
725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	770 }
1527 8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	771
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	772 static void sub_hfyu_median_prediction_mmx2(uint8_t dst, uint8_t src1, uint8_t src2, int w, int left, int *left_top){
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	773 int i=0;
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	774 uint8_t l, lt;
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	775
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	776 asm volatile(
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	777 "1: \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	778 "movq -1(%1, %0), %%mm0 \n\t" // LT
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	779 "movq (%1, %0), %%mm1 \n\t" // T
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	780 "movq -1(%2, %0), %%mm2 \n\t" // L
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	781 "movq (%2, %0), %%mm3 \n\t" // X
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	782 "movq %%mm2, %%mm4 \n\t" // L
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	783 "psubb %%mm0, %%mm2 \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	784 "paddb %%mm1, %%mm2 \n\t" // L + T - LT
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	785 "movq %%mm4, %%mm5 \n\t" // L
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	786 "pmaxub %%mm1, %%mm4 \n\t" // max(T, L)
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	787 "pminub %%mm5, %%mm1 \n\t" // min(T, L)
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	788 "pminub %%mm2, %%mm4 \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	789 "pmaxub %%mm1, %%mm4 \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	790 "psubb %%mm4, %%mm3 \n\t" // dst - pred
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	791 "movq %%mm3, (%3, %0) \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	792 "addl $8, %0 \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	793 "cmpl %4, %0 \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	794 " jb 1b \n\t"
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	795 : "+r" (i)
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	796 : "r"(src1), "r"(src2), "r"(dst), "r"(w)
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	797 );
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	798
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	799 l= *left;
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	800 lt= *left_top;
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	801
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	802 dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	803
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	804 *left_top= src1[w-1];
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	805 *left = src2[w-1];
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	806 }
8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	807
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	808 #define LBUTTERFLY2(a1,b1,a2,b2)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	809 "paddw " #b1 ", " #a1 " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	810 "paddw " #b2 ", " #a2 " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	811 "paddw " #b1 ", " #b1 " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	812 "paddw " #b2 ", " #b2 " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	813 "psubw " #a1 ", " #b1 " \n\t"\
1186 4f0072371bb9 10l (hadamard fix) michaelni parents: 1153 diff changeset	814 "psubw " #a2 ", " #b2 " \n\t"
866 725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	815
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	816 #define HADAMARD48\
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	817 LBUTTERFLY2(%%mm0, %%mm1, %%mm2, %%mm3)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	818 LBUTTERFLY2(%%mm4, %%mm5, %%mm6, %%mm7)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	819 LBUTTERFLY2(%%mm0, %%mm2, %%mm1, %%mm3)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	820 LBUTTERFLY2(%%mm4, %%mm6, %%mm5, %%mm7)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	821 LBUTTERFLY2(%%mm0, %%mm4, %%mm1, %%mm5)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	822 LBUTTERFLY2(%%mm2, %%mm6, %%mm3, %%mm7)\
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	823
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	824 #define MMABS(a,z)\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	825 "pxor " #z ", " #z " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	826 "pcmpgtw " #a ", " #z " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	827 "pxor " #z ", " #a " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	828 "psubw " #z ", " #a " \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	829
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	830 #define MMABS_SUM(a,z, sum)\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	831 "pxor " #z ", " #z " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	832 "pcmpgtw " #a ", " #z " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	833 "pxor " #z ", " #a " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	834 "psubw " #z ", " #a " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	835 "paddusw " #a ", " #sum " \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	836
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	837 #define MMABS_MMX2(a,z)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	838 "pxor " #z ", " #z " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	839 "psubw " #a ", " #z " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	840 "pmaxsw " #z ", " #a " \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	841
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	842 #define MMABS_SUM_MMX2(a,z, sum)\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	843 "pxor " #z ", " #z " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	844 "psubw " #a ", " #z " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	845 "pmaxsw " #z ", " #a " \n\t"\
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	846 "paddusw " #a ", " #sum " \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	847
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	848 #define SBUTTERFLY(a,b,t,n)\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	849 "movq " #a ", " #t " \n\t" /* abcd */\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	850 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	851 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	852
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	853 #define TRANSPOSE4(a,b,c,d,t)\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	854 SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	855 SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	856 SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	857 SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	858
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	859 #define LOAD4(o, a, b, c, d)\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	860 "movq "#o"(%1), " #a " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	861 "movq "#o"+16(%1), " #b " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	862 "movq "#o"+32(%1), " #c " \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	863 "movq "#o"+48(%1), " #d " \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	864
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	865 #define STORE4(o, a, b, c, d)\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	866 "movq "#a", "#o"(%1) \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	867 "movq "#b", "#o"+16(%1) \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	868 "movq "#c", "#o"+32(%1) \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	869 "movq "#d", "#o"+48(%1) \n\t"\
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	870
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	871 static int hadamard8_diff_mmx(void s, uint8_t src1, uint8_t *src2, int stride, int h){
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	872 uint64_t temp[16] __align8;
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	873 int sum=0;
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	874
dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	875 assert(h==8);
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	876
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	877 diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	878
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	879 asm volatile(
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	880 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	881 LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	882
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	883 HADAMARD48
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	884
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	885 "movq %%mm7, 112(%1) \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	886
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	887 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	888 STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	889
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	890 "movq 112(%1), %%mm7 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	891 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	892 STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	893
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	894 LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	895 LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	896
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	897 HADAMARD48
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	898
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	899 "movq %%mm7, 120(%1) \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	900
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	901 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	902 STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	903
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	904 "movq 120(%1), %%mm7 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	905 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	906 "movq %%mm7, %%mm5 \n\t"//FIXME remove
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	907 "movq %%mm6, %%mm7 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	908 "movq %%mm0, %%mm6 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	909 // STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	910
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	911 LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	912 // LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	913
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	914 HADAMARD48
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	915 "movq %%mm7, 64(%1) \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	916 MMABS(%%mm0, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	917 MMABS_SUM(%%mm1, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	918 MMABS_SUM(%%mm2, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	919 MMABS_SUM(%%mm3, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	920 MMABS_SUM(%%mm4, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	921 MMABS_SUM(%%mm5, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	922 MMABS_SUM(%%mm6, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	923 "movq 64(%1), %%mm1 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	924 MMABS_SUM(%%mm1, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	925 "movq %%mm0, 64(%1) \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	926
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	927 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	928 LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	929
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	930 HADAMARD48
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	931 "movq %%mm7, (%1) \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	932 MMABS(%%mm0, %%mm7)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	933 MMABS_SUM(%%mm1, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	934 MMABS_SUM(%%mm2, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	935 MMABS_SUM(%%mm3, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	936 MMABS_SUM(%%mm4, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	937 MMABS_SUM(%%mm5, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	938 MMABS_SUM(%%mm6, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	939 "movq (%1), %%mm1 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	940 MMABS_SUM(%%mm1, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	941 "movq 64(%1), %%mm1 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	942 MMABS_SUM(%%mm1, %%mm7, %%mm0)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	943
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	944 "movq %%mm0, %%mm1 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	945 "psrlq $32, %%mm0 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	946 "paddusw %%mm1, %%mm0 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	947 "movq %%mm0, %%mm1 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	948 "psrlq $16, %%mm0 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	949 "paddusw %%mm1, %%mm0 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	950 "movd %%mm0, %0 \n\t"
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	951
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	952 : "=r" (sum)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	953 : "r"(temp)
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	954 );
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	955 return sum&0xFFFF;
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	956 }
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	957
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	958 static int hadamard8_diff_mmx2(void s, uint8_t src1, uint8_t *src2, int stride, int h){
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	959 uint64_t temp[16] __align8;
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	960 int sum=0;
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	961
dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	962 assert(h==8);
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	963
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	964 diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	965
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	966 asm volatile(
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	967 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	968 LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	969
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	970 HADAMARD48
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	971
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	972 "movq %%mm7, 112(%1) \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	973
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	974 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	975 STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	976
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	977 "movq 112(%1), %%mm7 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	978 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	979 STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	980
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	981 LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	982 LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	983
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	984 HADAMARD48
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	985
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	986 "movq %%mm7, 120(%1) \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	987
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	988 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	989 STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	990
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	991 "movq 120(%1), %%mm7 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	992 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	993 "movq %%mm7, %%mm5 \n\t"//FIXME remove
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	994 "movq %%mm6, %%mm7 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	995 "movq %%mm0, %%mm6 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	996 // STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	997
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	998 LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	999 // LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1000
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1001 HADAMARD48
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1002 "movq %%mm7, 64(%1) \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1003 MMABS_MMX2(%%mm0, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1004 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1005 MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1006 MMABS_SUM_MMX2(%%mm3, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1007 MMABS_SUM_MMX2(%%mm4, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1008 MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1009 MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1010 "movq 64(%1), %%mm1 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1011 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1012 "movq %%mm0, 64(%1) \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1013
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1014 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1015 LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1016
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1017 HADAMARD48
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1018 "movq %%mm7, (%1) \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1019 MMABS_MMX2(%%mm0, %%mm7)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1020 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1021 MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1022 MMABS_SUM_MMX2(%%mm3, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1023 MMABS_SUM_MMX2(%%mm4, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1024 MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1025 MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1026 "movq (%1), %%mm1 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1027 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1028 "movq 64(%1), %%mm1 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1029 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1030
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1031 "movq %%mm0, %%mm1 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1032 "psrlq $32, %%mm0 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1033 "paddusw %%mm1, %%mm0 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1034 "movq %%mm0, %%mm1 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1035 "psrlq $16, %%mm0 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1036 "paddusw %%mm1, %%mm0 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1037 "movd %%mm0, %0 \n\t"
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1038
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1039 : "=r" (sum)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1040 : "r"(temp)
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1041 );
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1042 return sum&0xFFFF;
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1043 }
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1044
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1045
1708 dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	1046 WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
dea5b2946999 interlaced motion estimation michael parents: 1686 diff changeset	1047 WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1048 #endif //CONFIG_ENCODERS
866 725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	1049
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1050 #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1051 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1052
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1053 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1054 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1055 "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1056 "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1057 "movq "#in7", " #m3 " \n\t" /* d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1058 "movq "#in0", %%mm5 \n\t" /* D */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1059 "paddw " #m3 ", %%mm5 \n\t" /* x4 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1060 "psubw %%mm5, %%mm4 \n\t" /* 20x1 - x4 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1061 "movq "#in1", %%mm5 \n\t" /* C */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1062 "movq "#in2", %%mm6 \n\t" /* B */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1063 "paddw " #m6 ", %%mm5 \n\t" /* x3 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1064 "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1065 "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1066 "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1067 "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1068 "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1069 "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1070 "psraw $5, %%mm5 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1071 "packuswb %%mm5, %%mm5 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1072 OP(%%mm5, out, %%mm7, d)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1073
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1074 #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
1057 bb5de8a59da8 * static,const,compiler warning cleanup kabi parents: 997 diff changeset	1075 static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1076 uint64_t temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1077 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1078 asm volatile(\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1079 "pxor %%mm7, %%mm7 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1080 "1: \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1081 "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1082 "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1083 "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1084 "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1085 "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1086 "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1087 "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1088 "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1089 "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1090 "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1091 "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1092 "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1093 "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1094 "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1095 "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1096 "paddw %%mm3, %%mm5 \n\t" /* b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1097 "paddw %%mm2, %%mm6 \n\t" /* c */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1098 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1099 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1100 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1101 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1102 "paddw %%mm4, %%mm0 \n\t" /* a */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1103 "paddw %%mm1, %%mm5 \n\t" /* d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1104 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1105 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1106 "paddw %6, %%mm6 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1107 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1108 "psraw $5, %%mm0 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1109 "movq %%mm0, %5 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1110 /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1111 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1112 "movq 5(%0), %%mm0 \n\t" /* FGHIJKLM */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1113 "movq %%mm0, %%mm5 \n\t" /* FGHIJKLM */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1114 "movq %%mm0, %%mm6 \n\t" /* FGHIJKLM */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1115 "psrlq $8, %%mm0 \n\t" /* GHIJKLM0 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1116 "psrlq $16, %%mm5 \n\t" /* HIJKLM00 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1117 "punpcklbw %%mm7, %%mm0 \n\t" /* 0G0H0I0J */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1118 "punpcklbw %%mm7, %%mm5 \n\t" /* 0H0I0J0K */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1119 "paddw %%mm0, %%mm2 \n\t" /* b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1120 "paddw %%mm5, %%mm3 \n\t" /* c */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1121 "paddw %%mm2, %%mm2 \n\t" /* 2b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1122 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1123 "movq %%mm6, %%mm2 \n\t" /* FGHIJKLM */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1124 "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1125 "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1126 "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1127 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1128 "paddw %%mm2, %%mm1 \n\t" /* a */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1129 "paddw %%mm6, %%mm4 \n\t" /* d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1130 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1131 "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1132 "paddw %6, %%mm1 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1133 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1134 "psraw $5, %%mm3 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1135 "movq %5, %%mm1 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1136 "packuswb %%mm3, %%mm1 \n\t"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1137 OP_MMX2(%%mm1, (%1),%%mm4, q)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1138 /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1139 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1140 "movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1141 "movq %%mm1, %%mm4 \n\t" /* JKLMNOPQ */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1142 "movq %%mm1, %%mm3 \n\t" /* JKLMNOPQ */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1143 "psrlq $8, %%mm1 \n\t" /* KLMNOPQ0 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1144 "psrlq $16, %%mm4 \n\t" /* LMNOPQ00 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1145 "punpcklbw %%mm7, %%mm1 \n\t" /* 0K0L0M0N */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1146 "punpcklbw %%mm7, %%mm4 \n\t" /* 0L0M0N0O */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1147 "paddw %%mm1, %%mm5 \n\t" /* b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1148 "paddw %%mm4, %%mm0 \n\t" /* c */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1149 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1150 "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1151 "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1152 "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1153 "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1154 "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1155 "paddw %%mm3, %%mm2 \n\t" /* d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1156 "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1157 "movq %%mm5, %%mm2 \n\t" /* JKLMNOPQ */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1158 "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1159 "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1160 "paddw %%mm2, %%mm6 \n\t" /* a */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1161 "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1162 "paddw %6, %%mm0 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1163 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1164 "psraw $5, %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1165 /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1166 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1167 "paddw %%mm5, %%mm3 \n\t" /* a */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1168 "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1169 "paddw %%mm4, %%mm6 \n\t" /* b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1170 "pshufw $0xBE, %%mm5, %%mm4 \n\t" /* 0P0Q0Q0P */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1171 "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0Q0Q0P0O */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1172 "paddw %%mm1, %%mm4 \n\t" /* c */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1173 "paddw %%mm2, %%mm5 \n\t" /* d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1174 "paddw %%mm6, %%mm6 \n\t" /* 2b */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1175 "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1176 "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1177 "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1178 "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1179 "paddw %6, %%mm4 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1180 "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1181 "psraw $5, %%mm4 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1182 "packuswb %%mm4, %%mm0 \n\t"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1183 OP_MMX2(%%mm0, 8(%1), %%mm4, q)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1184 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1185 "addl %3, %0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1186 "addl %4, %1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1187 "decl %2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1188 " jnz 1b \n\t"\
967 274b518c4ecb PIC / ebx fix michaelni parents: 966 diff changeset	1189 : "+a"(src), "+c"(dst), "+m"(h)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1190 : "d"(srcStride), "S"(dstStride), /"m"(ff_pw_20), "m"(ff_pw_3),/ "m"(temp), "m"(ROUNDER)\
7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1191 : "memory"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1192 );\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1193 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1194 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1195 static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1196 int i;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1197 int16_t temp[16];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1198 /* quick HACK, XXX FIXME MUST be optimized */\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1199 for(i=0; i<h; i++)\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1200 {\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1201 temp[ 0]= (src[ 0]+src[ 1])20 - (src[ 0]+src[ 2])6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1202 temp[ 1]= (src[ 1]+src[ 2])20 - (src[ 0]+src[ 3])6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1203 temp[ 2]= (src[ 2]+src[ 3])20 - (src[ 1]+src[ 4])6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1204 temp[ 3]= (src[ 3]+src[ 4])20 - (src[ 2]+src[ 5])6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1205 temp[ 4]= (src[ 4]+src[ 5])20 - (src[ 3]+src[ 6])6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1206 temp[ 5]= (src[ 5]+src[ 6])20 - (src[ 4]+src[ 7])6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1207 temp[ 6]= (src[ 6]+src[ 7])20 - (src[ 5]+src[ 8])6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1208 temp[ 7]= (src[ 7]+src[ 8])20 - (src[ 6]+src[ 9])6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1209 temp[ 8]= (src[ 8]+src[ 9])20 - (src[ 7]+src[10])6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1210 temp[ 9]= (src[ 9]+src[10])20 - (src[ 8]+src[11])6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1211 temp[10]= (src[10]+src[11])20 - (src[ 9]+src[12])6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1212 temp[11]= (src[11]+src[12])20 - (src[10]+src[13])6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1213 temp[12]= (src[12]+src[13])20 - (src[11]+src[14])6 + (src[10]+src[15])*3 - (src[ 9]+src[16]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1214 temp[13]= (src[13]+src[14])20 - (src[12]+src[15])6 + (src[11]+src[16])*3 - (src[10]+src[16]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1215 temp[14]= (src[14]+src[15])20 - (src[13]+src[16])6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1216 temp[15]= (src[15]+src[16])20 - (src[14]+src[16])6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1217 asm volatile(\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1218 "movq (%0), %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1219 "movq 8(%0), %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1220 "paddw %2, %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1221 "paddw %2, %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1222 "psraw $5, %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1223 "psraw $5, %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1224 "packuswb %%mm1, %%mm0 \n\t"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1225 OP_3DNOW(%%mm0, (%1), %%mm1, q)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1226 "movq 16(%0), %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1227 "movq 24(%0), %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1228 "paddw %2, %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1229 "paddw %2, %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1230 "psraw $5, %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1231 "psraw $5, %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1232 "packuswb %%mm1, %%mm0 \n\t"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1233 OP_3DNOW(%%mm0, 8(%1), %%mm1, q)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1234 :: "r"(temp), "r"(dst), "m"(ROUNDER)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1235 : "memory"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1236 );\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1237 dst+=dstStride;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1238 src+=srcStride;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1239 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1240 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1241 \
1057 bb5de8a59da8 * static,const,compiler warning cleanup kabi parents: 997 diff changeset	1242 static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1243 uint64_t temp;\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1244 \
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1245 asm volatile(\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1246 "pxor %%mm7, %%mm7 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1247 "1: \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1248 "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1249 "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1250 "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1251 "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1252 "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1253 "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1254 "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1255 "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1256 "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1257 "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1258 "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1259 "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1260 "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1261 "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1262 "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1263 "paddw %%mm3, %%mm5 \n\t" /* b */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1264 "paddw %%mm2, %%mm6 \n\t" /* c */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1265 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1266 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1267 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1268 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1269 "paddw %%mm4, %%mm0 \n\t" /* a */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1270 "paddw %%mm1, %%mm5 \n\t" /* d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1271 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1272 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1273 "paddw %6, %%mm6 \n\t"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1274 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1275 "psraw $5, %%mm0 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1276 /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1277 \
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1278 "movd 5(%0), %%mm5 \n\t" /* FGHI */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1279 "punpcklbw %%mm7, %%mm5 \n\t" /* 0F0G0H0I */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1280 "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0G0H0I0I */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1281 "paddw %%mm5, %%mm1 \n\t" /* a */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1282 "paddw %%mm6, %%mm2 \n\t" /* b */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1283 "pshufw $0xBE, %%mm5, %%mm6 \n\t" /* 0H0I0I0H */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1284 "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0I0I0H0G */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1285 "paddw %%mm6, %%mm3 \n\t" /* c */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1286 "paddw %%mm5, %%mm4 \n\t" /* d */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1287 "paddw %%mm2, %%mm2 \n\t" /* 2b */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1288 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1289 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1290 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1291 "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1292 "paddw %6, %%mm1 \n\t"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1293 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1294 "psraw $5, %%mm3 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1295 "packuswb %%mm3, %%mm0 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1296 OP_MMX2(%%mm0, (%1), %%mm4, q)\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1297 \
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1298 "addl %3, %0 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1299 "addl %4, %1 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1300 "decl %2 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1301 " jnz 1b \n\t"\
967 274b518c4ecb PIC / ebx fix michaelni parents: 966 diff changeset	1302 : "+a"(src), "+c"(dst), "+m"(h)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1303 : "S"(srcStride), "D"(dstStride), /"m"(ff_pw_20), "m"(ff_pw_3),/ "m"(temp), "m"(ROUNDER)\
7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1304 : "memory"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1305 );\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1306 }\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1307 \
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1308 static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1309 int i;\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1310 int16_t temp[8];\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1311 /* quick HACK, XXX FIXME MUST be optimized */\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1312 for(i=0; i<h; i++)\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1313 {\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1314 temp[ 0]= (src[ 0]+src[ 1])20 - (src[ 0]+src[ 2])6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1315 temp[ 1]= (src[ 1]+src[ 2])20 - (src[ 0]+src[ 3])6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1316 temp[ 2]= (src[ 2]+src[ 3])20 - (src[ 1]+src[ 4])6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1317 temp[ 3]= (src[ 3]+src[ 4])20 - (src[ 2]+src[ 5])6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1318 temp[ 4]= (src[ 4]+src[ 5])20 - (src[ 3]+src[ 6])6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1319 temp[ 5]= (src[ 5]+src[ 6])20 - (src[ 4]+src[ 7])6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1320 temp[ 6]= (src[ 6]+src[ 7])20 - (src[ 5]+src[ 8])6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1321 temp[ 7]= (src[ 7]+src[ 8])20 - (src[ 6]+src[ 8])6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1322 asm volatile(\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1323 "movq (%0), %%mm0 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1324 "movq 8(%0), %%mm1 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1325 "paddw %2, %%mm0 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1326 "paddw %2, %%mm1 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1327 "psraw $5, %%mm0 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1328 "psraw $5, %%mm1 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1329 "packuswb %%mm1, %%mm0 \n\t"\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1330 OP_3DNOW(%%mm0, (%1), %%mm1, q)\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1331 :: "r"(temp), "r"(dst), "m"(ROUNDER)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1332 :"memory"\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1333 );\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1334 dst+=dstStride;\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1335 src+=srcStride;\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1336 }\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1337 }
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1338
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1339 #define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1340 \
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1341 static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1342 uint64_t temp[17*4];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1343 uint64_t *temp_ptr= temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1344 int count= 17;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1345 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1346 /FIXME unroll /\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1347 asm volatile(\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1348 "pxor %%mm7, %%mm7 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1349 "1: \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1350 "movq (%0), %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1351 "movq (%0), %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1352 "movq 8(%0), %%mm2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1353 "movq 8(%0), %%mm3 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1354 "punpcklbw %%mm7, %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1355 "punpckhbw %%mm7, %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1356 "punpcklbw %%mm7, %%mm2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1357 "punpckhbw %%mm7, %%mm3 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1358 "movq %%mm0, (%1) \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1359 "movq %%mm1, 17*8(%1) \n\t"\
967 274b518c4ecb PIC / ebx fix michaelni parents: 966 diff changeset	1360 "movq %%mm2, 2178(%1) \n\t"\
274b518c4ecb PIC / ebx fix michaelni parents: 966 diff changeset	1361 "movq %%mm3, 3178(%1) \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1362 "addl $8, %1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1363 "addl %3, %0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1364 "decl %2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1365 " jnz 1b \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1366 : "+r" (src), "+r" (temp_ptr), "+r"(count)\
967 274b518c4ecb PIC / ebx fix michaelni parents: 966 diff changeset	1367 : "r" (srcStride)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1368 : "memory"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1369 );\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1370 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1371 temp_ptr= temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1372 count=4;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1373 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1374 /FIXME reorder for speed /\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1375 asm volatile(\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1376 /"pxor %%mm7, %%mm7 \n\t"/\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1377 "1: \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1378 "movq (%0), %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1379 "movq 8(%0), %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1380 "movq 16(%0), %%mm2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1381 "movq 24(%0), %%mm3 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1382 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1383 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1384 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1385 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1386 \
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1387 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1388 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1389 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1390 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1391 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1392 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1393 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1394 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1395 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1396 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1397 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1398 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1399 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1400 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1401 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1402 \
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1403 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1404 "addl %4, %1 \n\t" \
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1405 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1406 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1407 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1408 "addl $136, %0 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1409 "addl %6, %1 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1410 "decl %2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1411 " jnz 1b \n\t"\
958 9bb668034ecf slowdown / gcc 2.95.* bug workaround (this should be reversed as soon as gcc 2.95.* support is droped) michaelni parents: 954 diff changeset	1412 \
967 274b518c4ecb PIC / ebx fix michaelni parents: 966 diff changeset	1413 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1414 : "r"(dstStride), "r"(2dstStride), /"m"(ff_pw_20), "m"(ff_pw_3),/ "m"(ROUNDER), "g"(4-14dstStride)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1415 :"memory"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1416 );\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1417 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1418 \
1057 bb5de8a59da8 * static,const,compiler warning cleanup kabi parents: 997 diff changeset	1419 static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1420 uint64_t temp[9*4];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1421 uint64_t *temp_ptr= temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1422 int count= 9;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1423 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1424 /FIXME unroll /\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1425 asm volatile(\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1426 "pxor %%mm7, %%mm7 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1427 "1: \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1428 "movq (%0), %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1429 "movq (%0), %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1430 "punpcklbw %%mm7, %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1431 "punpckhbw %%mm7, %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1432 "movq %%mm0, (%1) \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1433 "movq %%mm1, 9*8(%1) \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1434 "addl $8, %1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1435 "addl %3, %0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1436 "decl %2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1437 " jnz 1b \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1438 : "+r" (src), "+r" (temp_ptr), "+r"(count)\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1439 : "r" (srcStride)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1440 : "memory"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1441 );\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1442 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1443 temp_ptr= temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1444 count=2;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1445 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1446 /FIXME reorder for speed /\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1447 asm volatile(\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1448 /"pxor %%mm7, %%mm7 \n\t"/\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1449 "1: \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1450 "movq (%0), %%mm0 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1451 "movq 8(%0), %%mm1 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1452 "movq 16(%0), %%mm2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1453 "movq 24(%0), %%mm3 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1454 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1455 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1456 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1457 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1458 \
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1459 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1460 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1461 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1462 \
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1463 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1464 "addl %4, %1 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1465 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1466 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1467 \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1468 "addl $72, %0 \n\t"\
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1469 "addl %6, %1 \n\t"\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1470 "decl %2 \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1471 " jnz 1b \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1472 \
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1473 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1474 : "r"(dstStride), "r"(2dstStride), /"m"(ff_pw_20), "m"(ff_pw_3),/ "m"(ROUNDER), "g"(4-6dstStride)\
966 7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1475 : "memory"\
7ef9226f430e more gcc bug workarounds michaelni parents: 961 diff changeset	1476 );\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1477 }\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1478 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1479 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t dst, uint8_t src, int stride){\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1480 OPNAME ## pixels8_mmx(dst, src, stride, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1481 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1482 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1483 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1484 uint64_t temp[8];\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1485 uint8_t * const half= (uint8_t*)temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1486 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1487 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1488 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1489 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1490 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1491 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1492 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1493 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1494 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1495 uint64_t temp[8];\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1496 uint8_t * const half= (uint8_t*)temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1497 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1498 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1499 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1500 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1501 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1502 uint64_t temp[8];\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1503 uint8_t * const half= (uint8_t*)temp;\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1504 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1505 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1506 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1507 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1508 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t dst, uint8_t src, int stride){\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1509 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1510 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1511 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1512 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1513 uint64_t temp[8];\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1514 uint8_t * const half= (uint8_t*)temp;\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1515 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1516 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1517 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1518 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1519 uint64_t half[8 + 9];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1520 uint8_t * const halfH= ((uint8_t*)half) + 64;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1521 uint8_t * const halfHV= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1522 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1523 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1524 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1525 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1526 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1527 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1528 uint64_t half[8 + 9];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1529 uint8_t * const halfH= ((uint8_t*)half) + 64;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1530 uint8_t * const halfHV= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1531 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1532 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1533 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1534 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1535 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1536 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1537 uint64_t half[8 + 9];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1538 uint8_t * const halfH= ((uint8_t*)half) + 64;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1539 uint8_t * const halfHV= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1540 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1541 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1542 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1543 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1544 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1545 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1546 uint64_t half[8 + 9];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1547 uint8_t * const halfH= ((uint8_t*)half) + 64;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1548 uint8_t * const halfHV= ((uint8_t*)half);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1549 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1550 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1551 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1552 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1553 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1554 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1555 uint64_t half[8 + 9];\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1556 uint8_t * const halfH= ((uint8_t*)half) + 64;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1557 uint8_t * const halfHV= ((uint8_t*)half);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1558 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1559 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1560 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1561 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1562 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1563 uint64_t half[8 + 9];\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1564 uint8_t * const halfH= ((uint8_t*)half) + 64;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1565 uint8_t * const halfHV= ((uint8_t*)half);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1566 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1567 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1568 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1569 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1570 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1571 uint64_t half[8 + 9];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1572 uint8_t * const halfH= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1573 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1574 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1575 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1576 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1577 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1578 uint64_t half[8 + 9];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1579 uint8_t * const halfH= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1580 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1581 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1582 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1583 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1584 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1585 uint64_t half[9];\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1586 uint8_t * const halfH= ((uint8_t*)half);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1587 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1588 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1589 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1590 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t dst, uint8_t src, int stride){\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1591 OPNAME ## pixels16_mmx(dst, src, stride, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1592 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1593 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1594 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1595 uint64_t temp[32];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1596 uint8_t * const half= (uint8_t*)temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1597 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1598 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1599 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1600 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1601 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1602 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1603 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1604 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1605 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1606 uint64_t temp[32];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1607 uint8_t * const half= (uint8_t*)temp;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1608 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1609 OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1610 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1611 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1612 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1613 uint64_t temp[32];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1614 uint8_t * const half= (uint8_t*)temp;\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1615 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1616 OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1617 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1618 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1619 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t dst, uint8_t src, int stride){\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1620 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1621 }\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1622 \
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1623 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1624 uint64_t temp[32];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1625 uint8_t * const half= (uint8_t*)temp;\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1626 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1627 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1628 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1629 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1630 uint64_t half[162 + 172];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1631 uint8_t * const halfH= ((uint8_t*)half) + 256;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1632 uint8_t * const halfHV= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1633 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1634 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1635 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1636 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1637 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1638 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1639 uint64_t half[162 + 172];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1640 uint8_t * const halfH= ((uint8_t*)half) + 256;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1641 uint8_t * const halfHV= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1642 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1643 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1644 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1645 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1646 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1647 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1648 uint64_t half[162 + 172];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1649 uint8_t * const halfH= ((uint8_t*)half) + 256;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1650 uint8_t * const halfHV= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1651 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1652 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1653 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1654 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1655 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1656 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1657 uint64_t half[162 + 172];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1658 uint8_t * const halfH= ((uint8_t*)half) + 256;\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1659 uint8_t * const halfHV= ((uint8_t*)half);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1660 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1661 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1662 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1663 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1664 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1665 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1666 uint64_t half[162 + 172];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1667 uint8_t * const halfH= ((uint8_t*)half) + 256;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1668 uint8_t * const halfHV= ((uint8_t*)half);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1669 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1670 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1671 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1672 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1673 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1674 uint64_t half[162 + 172];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1675 uint8_t * const halfH= ((uint8_t*)half) + 256;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1676 uint8_t * const halfHV= ((uint8_t*)half);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1677 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1678 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1679 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1680 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1681 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1682 uint64_t half[17*2];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1683 uint8_t * const halfH= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1684 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1685 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1686 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1687 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1688 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t dst, uint8_t src, int stride){\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1689 uint64_t half[17*2];\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1690 uint8_t * const halfH= ((uint8_t*)half);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1691 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1692 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1693 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1694 }\
1064 b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t kabi parents: 1057 diff changeset	1695 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t dst, uint8_t src, int stride){\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1696 uint64_t half[17*2];\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1697 uint8_t * const halfH= ((uint8_t*)half);\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1698 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1699 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1700 }
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1701
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1702
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1703 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1704 #define AVG_3DNOW_OP(a,b,temp, size) \
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1705 "mov" #size " " #b ", " #temp " \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1706 "pavgusb " #temp ", " #a " \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1707 "mov" #size " " #a ", " #b " \n\t"
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1708 #define AVG_MMX2_OP(a,b,temp, size) \
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1709 "mov" #size " " #b ", " #temp " \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1710 "pavgb " #temp ", " #a " \n\t"\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1711 "mov" #size " " #a ", " #b " \n\t"
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1712
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1713 QPEL_BASE(put_ , ff_pw_16, _ , PUT_OP, PUT_OP)
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1714 QPEL_BASE(avg_ , ff_pw_16, _ , AVG_MMX2_OP, AVG_3DNOW_OP)
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1715 QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1716 QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, 3dnow)
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1717 QPEL_OP(avg_ , ff_pw_16, _ , AVG_3DNOW_OP, 3dnow)
3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1718 QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1719 QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1720 QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1721 QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1722
393 bf164fce2c14 removed debug function glantau parents: 387 diff changeset	1723 #if 0
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	1724 static void just_return() { return; }
393 bf164fce2c14 removed debug function glantau parents: 387 diff changeset	1725 #endif
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	1726
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1727 #define SET_QPEL_FUNC(postfix1, postfix2) \
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1728 c->put_ ## postfix1 = put_ ## postfix2;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1729 c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1730 c->avg_ ## postfix1 = avg_ ## postfix2;
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1731
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1732 /* external functions, from idct_mmx.c */
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1733 void ff_mmx_idct(DCTELEM *block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1734 void ff_mmxext_idct(DCTELEM *block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1735
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1736 /* XXX: those functions should be suppressed ASAP when all IDCTs are
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1737 converted */
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1738 static void ff_libmpeg2mmx_idct_put(uint8_t dest, int line_size, DCTELEM block)
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1739 {
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1740 ff_mmx_idct (block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1741 put_pixels_clamped_mmx(block, dest, line_size);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1742 }
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1743 static void ff_libmpeg2mmx_idct_add(uint8_t dest, int line_size, DCTELEM block)
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1744 {
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1745 ff_mmx_idct (block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1746 add_pixels_clamped_mmx(block, dest, line_size);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1747 }
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1748 static void ff_libmpeg2mmx2_idct_put(uint8_t dest, int line_size, DCTELEM block)
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1749 {
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1750 ff_mmxext_idct (block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1751 put_pixels_clamped_mmx(block, dest, line_size);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1752 }
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1753 static void ff_libmpeg2mmx2_idct_add(uint8_t dest, int line_size, DCTELEM block)
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1754 {
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1755 ff_mmxext_idct (block);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1756 add_pixels_clamped_mmx(block, dest, line_size);
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1757 }
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1758
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1759 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
0 986e461dc072 Initial revision glantau parents: diff changeset	1760 {
986e461dc072 Initial revision glantau parents: diff changeset	1761 mm_flags = mm_support();
1115 74a46d77e061 * support FF_MM_FORCE kabi parents: 1092 diff changeset	1762
1122 ddc3b0140b8f * oooooops - sorry for this one - wrong logic kabi parents: 1115 diff changeset	1763 if (avctx->dsp_mask) {
ddc3b0140b8f * oooooops - sorry for this one - wrong logic kabi parents: 1115 diff changeset	1764 if (avctx->dsp_mask & FF_MM_FORCE)
ddc3b0140b8f * oooooops - sorry for this one - wrong logic kabi parents: 1115 diff changeset	1765 mm_flags \|= (avctx->dsp_mask & 0xffff);
ddc3b0140b8f * oooooops - sorry for this one - wrong logic kabi parents: 1115 diff changeset	1766 else
ddc3b0140b8f * oooooops - sorry for this one - wrong logic kabi parents: 1115 diff changeset	1767 mm_flags &= ~(avctx->dsp_mask & 0xffff);
ddc3b0140b8f * oooooops - sorry for this one - wrong logic kabi parents: 1115 diff changeset	1768 }
1115 74a46d77e061 * support FF_MM_FORCE kabi parents: 1092 diff changeset	1769
631 47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1770 #if 0
47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1771 fprintf(stderr, "libavcodec: CPU flags:");
0 986e461dc072 Initial revision glantau parents: diff changeset	1772 if (mm_flags & MM_MMX)
631 47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1773 fprintf(stderr, " mmx");
0 986e461dc072 Initial revision glantau parents: diff changeset	1774 if (mm_flags & MM_MMXEXT)
631 47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1775 fprintf(stderr, " mmxext");
0 986e461dc072 Initial revision glantau parents: diff changeset	1776 if (mm_flags & MM_3DNOW)
631 47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1777 fprintf(stderr, " 3dnow");
0 986e461dc072 Initial revision glantau parents: diff changeset	1778 if (mm_flags & MM_SSE)
631 47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1779 fprintf(stderr, " sse");
0 986e461dc072 Initial revision glantau parents: diff changeset	1780 if (mm_flags & MM_SSE2)
631 47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1781 fprintf(stderr, " sse2");
47a8964ba5cd be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>) michaelni parents: 629 diff changeset	1782 fprintf(stderr, "\n");
0 986e461dc072 Initial revision glantau parents: diff changeset	1783 #endif
986e461dc072 Initial revision glantau parents: diff changeset	1784
986e461dc072 Initial revision glantau parents: diff changeset	1785 if (mm_flags & MM_MMX) {
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1786 const int dct_algo = avctx->dct_algo;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1787 const int idct_algo= avctx->idct_algo;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1788
1232 e88d3b1fb2a1 more #ifdef CONFIG_ENCODERS by (Wolfgang Hesseler <qv at multimediaware dot com>) michaelni parents: 1186 diff changeset	1789 #ifdef CONFIG_ENCODERS
1565 1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1530 diff changeset	1790 if(dct_algo==FF_DCT_AUTO \|\| dct_algo==FF_DCT_MMX){
1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1530 diff changeset	1791 if(mm_flags & MM_MMXEXT){
1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1530 diff changeset	1792 c->fdct = ff_fdct_mmx2;
1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1530 diff changeset	1793 }else{
1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1530 diff changeset	1794 c->fdct = ff_fdct_mmx;
1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1530 diff changeset	1795 }
1a9a63f59849 minor mmx2 optimization if the dct michael parents: 1530 diff changeset	1796 }
1232 e88d3b1fb2a1 more #ifdef CONFIG_ENCODERS by (Wolfgang Hesseler <qv at multimediaware dot com>) michaelni parents: 1186 diff changeset	1797 #endif //CONFIG_ENCODERS
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1798
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1799 if(idct_algo==FF_IDCT_AUTO \|\| idct_algo==FF_IDCT_SIMPLEMMX){
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1800 c->idct_put= ff_simple_idct_put_mmx;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1801 c->idct_add= ff_simple_idct_add_mmx;
1324 7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>) michaelni parents: 1232 diff changeset	1802 c->idct = ff_simple_idct_mmx;
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1803 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1804 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1805 if(mm_flags & MM_MMXEXT){
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1806 c->idct_put= ff_libmpeg2mmx2_idct_put;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1807 c->idct_add= ff_libmpeg2mmx2_idct_add;
1324 7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>) michaelni parents: 1232 diff changeset	1808 c->idct = ff_mmxext_idct;
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1809 }else{
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1810 c->idct_put= ff_libmpeg2mmx_idct_put;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1811 c->idct_add= ff_libmpeg2mmx_idct_add;
1324 7d328fd9d8a5 the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>) michaelni parents: 1232 diff changeset	1812 c->idct = ff_mmx_idct;
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1813 }
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1814 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1815 }
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1816
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1817 #ifdef CONFIG_ENCODERS
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1818 c->get_pixels = get_pixels_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1819 c->diff_pixels = diff_pixels_mmx;
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1820 #endif //CONFIG_ENCODERS
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1821 c->put_pixels_clamped = put_pixels_clamped_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1822 c->add_pixels_clamped = add_pixels_clamped_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1823 c->clear_blocks = clear_blocks_mmx;
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1824 #ifdef CONFIG_ENCODERS
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1825 c->pix_sum = pix_sum16_mmx;
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1826 #endif //CONFIG_ENCODERS
415 1c3f42442fba * added simple test main - see comments about how to kabi parents: 402 diff changeset	1827
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1828 c->put_pixels_tab[0][0] = put_pixels16_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1829 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1830 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1831 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
0 986e461dc072 Initial revision glantau parents: diff changeset	1832
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1833 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1834 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1835 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1836 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
651 45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	1837
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1838 c->avg_pixels_tab[0][0] = avg_pixels16_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1839 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1840 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1841 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
415 1c3f42442fba * added simple test main - see comments about how to kabi parents: 402 diff changeset	1842
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1843 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1844 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1845 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1846 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1847
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1848 c->put_pixels_tab[1][0] = put_pixels8_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1849 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1850 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1851 c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
0 986e461dc072 Initial revision glantau parents: diff changeset	1852
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1853 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1854 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1855 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1856 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
651 45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	1857
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1858 c->avg_pixels_tab[1][0] = avg_pixels8_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1859 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1860 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1861 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
651 45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	1862
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1863 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1864 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1865 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1866 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1867
866 725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	1868 c->add_bytes= add_bytes_mmx;
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1869 #ifdef CONFIG_ENCODERS
866 725ef4ea3ecc huffyuv michaelni parents: 853 diff changeset	1870 c->diff_bytes= diff_bytes_mmx;
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	1871
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	1872 c->hadamard8_diff[0]= hadamard8_diff16_mmx;
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	1873 c->hadamard8_diff[1]= hadamard8_diff_mmx;
caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	1874
997 4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	1875 c->pix_norm1 = pix_norm1_mmx;
4dfe15ae0078 sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications) michaelni parents: 984 diff changeset	1876 c->sse[0] = sse16_mmx;
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1877 #endif //CONFIG_ENCODERS
1647 c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	1878
c943c1d2d099 h263_v_loop_filter_mmx michael parents: 1566 diff changeset	1879 c->h263_v_loop_filter= h263_v_loop_filter_mmx;
1648 de28264c3dc3 h263_h_loop_filter_mmx michael parents: 1647 diff changeset	1880 c->h263_h_loop_filter= h263_h_loop_filter_mmx;
936 caa77cd960c0 qpel encoding michaelni parents: 866 diff changeset	1881
0 986e461dc072 Initial revision glantau parents: diff changeset	1882 if (mm_flags & MM_MMXEXT) {
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1883 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1884 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
651 45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	1885
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1886 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1887 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1888 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
415 1c3f42442fba * added simple test main - see comments about how to kabi parents: 402 diff changeset	1889
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1890 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1891 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
651 45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	1892
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1893 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1894 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1895 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1896
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1897 #ifdef CONFIG_ENCODERS
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1898 c->hadamard8_diff[0]= hadamard8_diff16_mmx2;
2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1899 c->hadamard8_diff[1]= hadamard8_diff_mmx2;
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	1900 #endif //CONFIG_ENCODERS
1153 2725c8eb3c81 faster hadamard transform michaelni parents: 1122 diff changeset	1901
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1902 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1903 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1904 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1905 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1906 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1907 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1908 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1909 }
959 3ec070eef24a qpel in b frames bugfixes michaelni parents: 958 diff changeset	1910
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1911 #if 1
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1912 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1913 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1914 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1915 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1916 SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1917 SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1918 SET_QPEL_FUNC(qpel_pixels_tab[0][ 6], qpel16_mc21_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1919 SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1920 SET_QPEL_FUNC(qpel_pixels_tab[0][ 8], qpel16_mc02_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1921 SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1922 SET_QPEL_FUNC(qpel_pixels_tab[0][10], qpel16_mc22_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1923 SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1924 SET_QPEL_FUNC(qpel_pixels_tab[0][12], qpel16_mc03_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1925 SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1926 SET_QPEL_FUNC(qpel_pixels_tab[0][14], qpel16_mc23_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1927 SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1928 SET_QPEL_FUNC(qpel_pixels_tab[1][ 0], qpel8_mc00_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1929 SET_QPEL_FUNC(qpel_pixels_tab[1][ 1], qpel8_mc10_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1930 SET_QPEL_FUNC(qpel_pixels_tab[1][ 2], qpel8_mc20_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1931 SET_QPEL_FUNC(qpel_pixels_tab[1][ 3], qpel8_mc30_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1932 SET_QPEL_FUNC(qpel_pixels_tab[1][ 4], qpel8_mc01_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1933 SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1934 SET_QPEL_FUNC(qpel_pixels_tab[1][ 6], qpel8_mc21_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1935 SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1936 SET_QPEL_FUNC(qpel_pixels_tab[1][ 8], qpel8_mc02_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1937 SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1938 SET_QPEL_FUNC(qpel_pixels_tab[1][10], qpel8_mc22_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1939 SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1940 SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1941 SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1942 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1943 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2)
961 f8c5babc7b4e 1000l (push & esp) using mangle now ... michaelni parents: 959 diff changeset	1944 #endif
1527 8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	1945
1686 68abbec33289 Here are juste two added #ifdef CONFIG_ENCODERS to allow michael parents: 1648 diff changeset	1946 #ifdef CONFIG_ENCODERS
1527 8ffd0c00e6df mmx2 optimization of huffyuv median encoding michael parents: 1324 diff changeset	1947 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
1686 68abbec33289 Here are juste two added #ifdef CONFIG_ENCODERS to allow michael parents: 1648 diff changeset	1948 #endif //CONFIG_ENCODERS
0 986e461dc072 Initial revision glantau parents: diff changeset	1949 } else if (mm_flags & MM_3DNOW) {
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1950 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1951 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
393 bf164fce2c14 removed debug function glantau parents: 387 diff changeset	1952
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1953 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1954 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1955 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
651 45e8f39fda50 put/avg_pixels16 michaelni parents: 631 diff changeset	1956
853 eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1957 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1958 c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1959
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1960 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1961 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
eacc2dd8fd9d * using DSPContext - so each codec could use its local (sub)set of CPU extension kabi parents: 706 diff changeset	1962 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1963
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1964 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1965 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1966 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1967 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1968 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1969 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1970 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	1971 }
984 e162c09efbe7 qpel fix michaelni parents: 967 diff changeset	1972
954 13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1973 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1974 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1975 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1976 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1977 SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1978 SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1979 SET_QPEL_FUNC(qpel_pixels_tab[0][ 6], qpel16_mc21_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1980 SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1981 SET_QPEL_FUNC(qpel_pixels_tab[0][ 8], qpel16_mc02_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1982 SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1983 SET_QPEL_FUNC(qpel_pixels_tab[0][10], qpel16_mc22_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1984 SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1985 SET_QPEL_FUNC(qpel_pixels_tab[0][12], qpel16_mc03_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1986 SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1987 SET_QPEL_FUNC(qpel_pixels_tab[0][14], qpel16_mc23_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1988 SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1989 SET_QPEL_FUNC(qpel_pixels_tab[1][ 0], qpel8_mc00_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1990 SET_QPEL_FUNC(qpel_pixels_tab[1][ 1], qpel8_mc10_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1991 SET_QPEL_FUNC(qpel_pixels_tab[1][ 2], qpel8_mc20_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1992 SET_QPEL_FUNC(qpel_pixels_tab[1][ 3], qpel8_mc30_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1993 SET_QPEL_FUNC(qpel_pixels_tab[1][ 4], qpel8_mc01_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1994 SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1995 SET_QPEL_FUNC(qpel_pixels_tab[1][ 6], qpel8_mc21_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1996 SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1997 SET_QPEL_FUNC(qpel_pixels_tab[1][ 8], qpel8_mc02_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1998 SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	1999 SET_QPEL_FUNC(qpel_pixels_tab[1][10], qpel8_mc22_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	2000 SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	2001 SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	2002 SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	2003 SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow)
13aec7e50c52 qpel in mmx2/3dnow michaelni parents: 936 diff changeset	2004 SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow)
0 986e461dc072 Initial revision glantau parents: diff changeset	2005 }
986e461dc072 Initial revision glantau parents: diff changeset	2006 }
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	2007
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	2008 #ifdef CONFIG_ENCODERS
1092 f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_* michaelni parents: 1065 diff changeset	2009 dsputil_init_pix_mmx(c, avctx);
1530 3b31998fe22f disable encoders where appropriate (patch courtesy of BERO melanson parents: 1527 diff changeset	2010 #endif //CONFIG_ENCODERS
247 6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2011 #if 0
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2012 // for speed testing
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2013 get_pixels = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2014 put_pixels_clamped = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2015 add_pixels_clamped = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2016
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2017 pix_abs16x16 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2018 pix_abs16x16_x2 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2019 pix_abs16x16_y2 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2020 pix_abs16x16_xy2 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2021
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2022 put_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2023 put_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2024 put_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2025 put_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2026
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2027 put_no_rnd_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2028 put_no_rnd_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2029 put_no_rnd_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2030 put_no_rnd_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2031
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2032 avg_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2033 avg_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2034 avg_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2035 avg_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2036
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2037 avg_no_rnd_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2038 avg_no_rnd_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2039 avg_no_rnd_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2040 avg_no_rnd_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2041
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2042 //av_fdct = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2043 //ff_idct = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC kabi parents: 188 diff changeset	2044 #endif
0 986e461dc072 Initial revision glantau parents: diff changeset	2045 }

Mercurial > libavcodec.hg

annotate i386/dsputil_mmx.c @ 1708:dea5b2946999 libavcodec