annotate i386/dsputil_mmx.c @ 625:bb6a69f9d409 libavcodec

slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG) per context DCT selection
author michaelni
date Thu, 29 Aug 2002 23:55:32 +0000
parents c9b17c1a02e0
children f65040bf4357
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
1 /*
986e461dc072 Initial revision
glantau
parents:
diff changeset
2 * MMX optimized DSP utils
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
3 * Copyright (c) 2000, 2001 Fabrice Bellard.
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
4 *
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
5 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
7 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
8 * version 2 of the License, or (at your option) any later version.
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
9 *
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
10 * This library is distributed in the hope that it will be useful,
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
13 * Lesser General Public License for more details.
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
14 *
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
16 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
18 *
986e461dc072 Initial revision
glantau
parents:
diff changeset
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
986e461dc072 Initial revision
glantau
parents:
diff changeset
20 */
986e461dc072 Initial revision
glantau
parents:
diff changeset
21
986e461dc072 Initial revision
glantau
parents:
diff changeset
22 #include "../dsputil.h"
174
ac5075a55488 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents: 151
diff changeset
23 #include "../simple_idct.h"
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
24
5
4479bcab253e suppressed no longer needed emms()
glantau
parents: 0
diff changeset
25 int mm_flags; /* multimedia extension flags */
4479bcab253e suppressed no longer needed emms()
glantau
parents: 0
diff changeset
26
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
30 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
31
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
32 int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
33 int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
34 int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
35 int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
36
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
37 int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
38 int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
39 int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
40 int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
41
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
42 int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
43 int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
44 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
45 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
46
42
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
47 /* external functions, from idct_mmx.c */
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
48 void ff_mmx_idct(DCTELEM *block);
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
49 void ff_mmxext_idct(DCTELEM *block);
19
82d4c9be9873 MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents: 8
diff changeset
50
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
51 /* pixel operations */
387
b8f3affeb8e1 shared lib support (req by kabi) ...
michaelni
parents: 386
diff changeset
52 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
b8f3affeb8e1 shared lib support (req by kabi) ...
michaelni
parents: 386
diff changeset
53 static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
b8f3affeb8e1 shared lib support (req by kabi) ...
michaelni
parents: 386
diff changeset
54 static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
55
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
56 #define JUMPALIGN() __asm __volatile (".balign 8"::)
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
57 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
58
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
59 #define MOVQ_WONE(regd) \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
60 __asm __volatile ( \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
61 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
62 "psrlw $15, %%" #regd ::)
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
63
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
64 #define MOVQ_BFE(regd) \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
65 __asm __volatile ( \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
66 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
67 "paddb %%" #regd ", %%" #regd " \n\t" ::)
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
68
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
69 #ifndef PIC
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
70 #define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone))
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
71 #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
72 #else
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
73 // for shared library it's better to use this way for accessing constants
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
74 // pcmpeqd -> -1
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
75 #define MOVQ_BONE(regd) \
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
76 __asm __volatile ( \
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
77 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
78 "psrlw $15, %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
79 "packuswb %%" #regd ", %%" #regd " \n\t" ::)
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
80
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
81 #define MOVQ_WTWO(regd) \
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
82 __asm __volatile ( \
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
83 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
84 "psrlw $15, %%" #regd " \n\t" \
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
85 "psllw $1, %%" #regd " \n\t"::)
387
b8f3affeb8e1 shared lib support (req by kabi) ...
michaelni
parents: 386
diff changeset
86
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
87 #endif
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
88
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
89 // using regr as temporary and for the output result
444
a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents: 438
diff changeset
90 // first argument is unmodifed and second is trashed
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
91 // regfe is supposed to contain 0xfefefefefefefefe
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
92 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
93 "movq " #rega ", " #regr " \n\t"\
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
94 "pand " #regb ", " #regr " \n\t"\
444
a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents: 438
diff changeset
95 "pxor " #rega ", " #regb " \n\t"\
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
96 "pand " #regfe "," #regb " \n\t"\
444
a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents: 438
diff changeset
97 "psrlq $1, " #regb " \n\t"\
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
98 "paddb " #regb ", " #regr " \n\t"
444
a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents: 438
diff changeset
99
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
100 #define PAVGB_MMX(rega, regb, regr, regfe) \
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
101 "movq " #rega ", " #regr " \n\t"\
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
102 "por " #regb ", " #regr " \n\t"\
444
a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents: 438
diff changeset
103 "pxor " #rega ", " #regb " \n\t"\
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
104 "pand " #regfe "," #regb " \n\t"\
444
a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents: 438
diff changeset
105 "psrlq $1, " #regb " \n\t"\
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
106 "psubb " #regb ", " #regr " \n\t"
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
107
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
108 // mm6 is supposed to contain 0xfefefefefefefefe
446
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
109 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
110 "movq " #rega ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
111 "movq " #regc ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
112 "pand " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
113 "pand " #regd ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
114 "pxor " #rega ", " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
115 "pxor " #regc ", " #regd " \n\t"\
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
116 "pand %%mm6, " #regb " \n\t"\
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
117 "pand %%mm6, " #regd " \n\t"\
446
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
118 "psrlq $1, " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
119 "psrlq $1, " #regd " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
120 "paddb " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
121 "paddb " #regd ", " #regp " \n\t"
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
122
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
123 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
124 "movq " #rega ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
125 "movq " #regc ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
126 "por " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
127 "por " #regd ", " #regp " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
128 "pxor " #rega ", " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
129 "pxor " #regc ", " #regd " \n\t"\
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
130 "pand %%mm6, " #regb " \n\t"\
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
131 "pand %%mm6, " #regd " \n\t"\
446
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
132 "psrlq $1, " #regd " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
133 "psrlq $1, " #regb " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
134 "psubb " #regb ", " #regr " \n\t"\
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
135 "psubb " #regd ", " #regp " \n\t"
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
136
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
137 /***********************************/
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
138 /* MMX no rounding */
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
139 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
140 #define SET_RND MOVQ_WONE
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
141 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
142 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
143
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
144 #include "dsputil_mmx_rnd.h"
444
a5edef76dac6 * new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents: 438
diff changeset
145
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
146 #undef DEF
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
147 #undef SET_RND
446
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
148 #undef PAVGBP
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
149 #undef PAVGB
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
150 /***********************************/
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
151 /* MMX rounding */
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
152
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
153 #define DEF(x, y) x ## _ ## y ##_mmx
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
154 #define SET_RND MOVQ_WTWO
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
155 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
156 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
445
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
157
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
158 #include "dsputil_mmx_rnd.h"
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
159
62c01dbdc1e0 * code with new PAVGB for MMX only CPU splited into separate file
kabi
parents: 444
diff changeset
160 #undef DEF
448
e8c8ca9106aa * removed MANGLE from macros for setting constants
kabi
parents: 446
diff changeset
161 #undef SET_RND
446
efe0c0d40577 * reenabled original xy2 put routine - rounding error is really bad with
kabi
parents: 445
diff changeset
162 #undef PAVGBP
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
163 #undef PAVGB
387
b8f3affeb8e1 shared lib support (req by kabi) ...
michaelni
parents: 386
diff changeset
164
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
165 /***********************************/
986e461dc072 Initial revision
glantau
parents:
diff changeset
166 /* 3Dnow specific */
986e461dc072 Initial revision
glantau
parents:
diff changeset
167
986e461dc072 Initial revision
glantau
parents:
diff changeset
168 #define DEF(x) x ## _3dnow
986e461dc072 Initial revision
glantau
parents:
diff changeset
169 /* for Athlons PAVGUSB is prefered */
986e461dc072 Initial revision
glantau
parents:
diff changeset
170 #define PAVGB "pavgusb"
986e461dc072 Initial revision
glantau
parents:
diff changeset
171
986e461dc072 Initial revision
glantau
parents:
diff changeset
172 #include "dsputil_mmx_avg.h"
986e461dc072 Initial revision
glantau
parents:
diff changeset
173
986e461dc072 Initial revision
glantau
parents:
diff changeset
174 #undef DEF
986e461dc072 Initial revision
glantau
parents:
diff changeset
175 #undef PAVGB
986e461dc072 Initial revision
glantau
parents:
diff changeset
176
986e461dc072 Initial revision
glantau
parents:
diff changeset
177 /***********************************/
986e461dc072 Initial revision
glantau
parents:
diff changeset
178 /* MMX2 specific */
986e461dc072 Initial revision
glantau
parents:
diff changeset
179
386
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
180 #define DEF(x) x ## _mmx2
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
181
986e461dc072 Initial revision
glantau
parents:
diff changeset
182 /* Introduced only in MMX2 set */
986e461dc072 Initial revision
glantau
parents:
diff changeset
183 #define PAVGB "pavgb"
986e461dc072 Initial revision
glantau
parents:
diff changeset
184
986e461dc072 Initial revision
glantau
parents:
diff changeset
185 #include "dsputil_mmx_avg.h"
986e461dc072 Initial revision
glantau
parents:
diff changeset
186
986e461dc072 Initial revision
glantau
parents:
diff changeset
187 #undef DEF
986e461dc072 Initial revision
glantau
parents:
diff changeset
188 #undef PAVGB
986e461dc072 Initial revision
glantau
parents:
diff changeset
189
986e461dc072 Initial revision
glantau
parents:
diff changeset
190 /***********************************/
986e461dc072 Initial revision
glantau
parents:
diff changeset
191 /* standard MMX */
986e461dc072 Initial revision
glantau
parents:
diff changeset
192
986e461dc072 Initial revision
glantau
parents:
diff changeset
193 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
986e461dc072 Initial revision
glantau
parents:
diff changeset
194 {
386
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
195 asm volatile(
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
196 "movl $-128, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
197 "pxor %%mm7, %%mm7 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
198 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
199 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
200 "movq (%0), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
201 "movq (%0, %2), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
202 "movq %%mm0, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
203 "movq %%mm2, %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
204 "punpcklbw %%mm7, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
205 "punpckhbw %%mm7, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
206 "punpcklbw %%mm7, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
207 "punpckhbw %%mm7, %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
208 "movq %%mm0, (%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
209 "movq %%mm1, 8(%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
210 "movq %%mm2, 16(%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
211 "movq %%mm3, 24(%1, %%eax)\n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
212 "addl %3, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
213 "addl $32, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
214 "js 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
215 : "+r" (pixels)
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
216 : "r" (block+64), "r" (line_size), "r" (line_size*2)
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
217 : "%eax"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
218 );
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
219 }
986e461dc072 Initial revision
glantau
parents:
diff changeset
220
324
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
221 static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
222 {
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
223 asm volatile(
386
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
224 "pxor %%mm7, %%mm7 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
225 "movl $-128, %%eax \n\t"
324
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
226 ".balign 16 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
227 "1: \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
228 "movq (%0), %%mm0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
229 "movq (%1), %%mm2 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
230 "movq %%mm0, %%mm1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
231 "movq %%mm2, %%mm3 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
232 "punpcklbw %%mm7, %%mm0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
233 "punpckhbw %%mm7, %%mm1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
234 "punpcklbw %%mm7, %%mm2 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
235 "punpckhbw %%mm7, %%mm3 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
236 "psubw %%mm2, %%mm0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
237 "psubw %%mm3, %%mm1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
238 "movq %%mm0, (%2, %%eax)\n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
239 "movq %%mm1, 8(%2, %%eax)\n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
240 "addl %3, %0 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
241 "addl %3, %1 \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
242 "addl $16, %%eax \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
243 "jnz 1b \n\t"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
244 : "+r" (s1), "+r" (s2)
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
245 : "r" (block+64), "r" (stride)
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
246 : "%eax"
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
247 );
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
248 }
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
249
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
250 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
986e461dc072 Initial revision
glantau
parents:
diff changeset
251 {
986e461dc072 Initial revision
glantau
parents:
diff changeset
252 const DCTELEM *p;
986e461dc072 Initial revision
glantau
parents:
diff changeset
253 UINT8 *pix;
986e461dc072 Initial revision
glantau
parents:
diff changeset
254
986e461dc072 Initial revision
glantau
parents:
diff changeset
255 /* read the pixels */
986e461dc072 Initial revision
glantau
parents:
diff changeset
256 p = block;
986e461dc072 Initial revision
glantau
parents:
diff changeset
257 pix = pixels;
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
258 /* unrolled loop */
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
259 __asm __volatile(
151
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
260 "movq %3, %%mm0\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
261 "movq 8%3, %%mm1\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
262 "movq 16%3, %%mm2\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
263 "movq 24%3, %%mm3\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
264 "movq 32%3, %%mm4\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
265 "movq 40%3, %%mm5\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
266 "movq 48%3, %%mm6\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
267 "movq 56%3, %%mm7\n\t"
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
268 "packuswb %%mm1, %%mm0\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
269 "packuswb %%mm3, %%mm2\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
270 "packuswb %%mm5, %%mm4\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
271 "packuswb %%mm7, %%mm6\n\t"
151
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
272 "movq %%mm0, (%0)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
273 "movq %%mm2, (%0, %1)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
274 "movq %%mm4, (%0, %1, 2)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
275 "movq %%mm6, (%0, %2)\n\t"
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
276 ::"r" (pix), "r" (line_size), "r" (line_size*3), "m"(*p)
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
277 :"memory");
986e461dc072 Initial revision
glantau
parents:
diff changeset
278 pix += line_size*4;
986e461dc072 Initial revision
glantau
parents:
diff changeset
279 p += 32;
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
280
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
281 // if here would be an exact copy of the code above
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
282 // compiler would generate some very strange code
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
283 // thus using "r"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
284 __asm __volatile(
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
285 "movq (%3), %%mm0\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
286 "movq 8(%3), %%mm1\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
287 "movq 16(%3), %%mm2\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
288 "movq 24(%3), %%mm3\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
289 "movq 32(%3), %%mm4\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
290 "movq 40(%3), %%mm5\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
291 "movq 48(%3), %%mm6\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
292 "movq 56(%3), %%mm7\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
293 "packuswb %%mm1, %%mm0\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
294 "packuswb %%mm3, %%mm2\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
295 "packuswb %%mm5, %%mm4\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
296 "packuswb %%mm7, %%mm6\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
297 "movq %%mm0, (%0)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
298 "movq %%mm2, (%0, %1)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
299 "movq %%mm4, (%0, %1, 2)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
300 "movq %%mm6, (%0, %2)\n\t"
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
301 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p)
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
302 :"memory");
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
303 }
986e461dc072 Initial revision
glantau
parents:
diff changeset
304
986e461dc072 Initial revision
glantau
parents:
diff changeset
305 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
986e461dc072 Initial revision
glantau
parents:
diff changeset
306 {
986e461dc072 Initial revision
glantau
parents:
diff changeset
307 const DCTELEM *p;
986e461dc072 Initial revision
glantau
parents:
diff changeset
308 UINT8 *pix;
986e461dc072 Initial revision
glantau
parents:
diff changeset
309 int i;
986e461dc072 Initial revision
glantau
parents:
diff changeset
310
986e461dc072 Initial revision
glantau
parents:
diff changeset
311 /* read the pixels */
986e461dc072 Initial revision
glantau
parents:
diff changeset
312 p = block;
986e461dc072 Initial revision
glantau
parents:
diff changeset
313 pix = pixels;
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
314 MOVQ_ZERO(mm7);
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
315 i = 4;
342
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents: 324
diff changeset
316 do {
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
317 __asm __volatile(
342
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents: 324
diff changeset
318 "movq (%2), %%mm0\n\t"
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents: 324
diff changeset
319 "movq 8(%2), %%mm1\n\t"
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents: 324
diff changeset
320 "movq 16(%2), %%mm2\n\t"
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents: 324
diff changeset
321 "movq 24(%2), %%mm3\n\t"
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
322 "movq %0, %%mm4\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
323 "movq %1, %%mm6\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
324 "movq %%mm4, %%mm5\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
325 "punpcklbw %%mm7, %%mm4\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
326 "punpckhbw %%mm7, %%mm5\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
327 "paddsw %%mm4, %%mm0\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
328 "paddsw %%mm5, %%mm1\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
329 "movq %%mm6, %%mm5\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
330 "punpcklbw %%mm7, %%mm6\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
331 "punpckhbw %%mm7, %%mm5\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
332 "paddsw %%mm6, %%mm2\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
333 "paddsw %%mm5, %%mm3\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
334 "packuswb %%mm1, %%mm0\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
335 "packuswb %%mm3, %%mm2\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
336 "movq %%mm0, %0\n\t"
986e461dc072 Initial revision
glantau
parents:
diff changeset
337 "movq %%mm2, %1\n\t"
151
ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents: 42
diff changeset
338 :"+m"(*pix), "+m"(*(pix+line_size))
342
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents: 324
diff changeset
339 :"r"(p)
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
340 :"memory");
986e461dc072 Initial revision
glantau
parents:
diff changeset
341 pix += line_size*2;
986e461dc072 Initial revision
glantau
parents:
diff changeset
342 p += 16;
342
8635a7036395 * fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents: 324
diff changeset
343 } while (--i);
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
344 }
986e461dc072 Initial revision
glantau
parents:
diff changeset
345
986e461dc072 Initial revision
glantau
parents:
diff changeset
346 static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h)
986e461dc072 Initial revision
glantau
parents:
diff changeset
347 {
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
348 __asm __volatile(
420
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
349 "lea (%3, %3), %%eax \n\t"
422
aa4a1c6209bd * baling 8 seems to have the same speed
kabi
parents: 421
diff changeset
350 ".balign 8 \n\t"
420
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
351 "1: \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
352 "movq (%1), %%mm0 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
353 "movq (%1, %3), %%mm1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
354 "movq %%mm0, (%2) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
355 "movq %%mm1, (%2, %3) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
356 "addl %%eax, %1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
357 "addl %%eax, %2 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
358 "movq (%1), %%mm0 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
359 "movq (%1, %3), %%mm1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
360 "movq %%mm0, (%2) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
361 "movq %%mm1, (%2, %3) \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
362 "addl %%eax, %1 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
363 "addl %%eax, %2 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
364 "subl $4, %0 \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
365 "jnz 1b \n\t"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
366 : "+g"(h), "+r" (pixels), "+r" (block)
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
367 : "r"(line_size)
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
368 : "%eax", "memory"
bbaf743f353f * cleanup for put_pixels_mmx
kabi
parents: 418
diff changeset
369 );
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
370 }
986e461dc072 Initial revision
glantau
parents:
diff changeset
371
296
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
372 static void clear_blocks_mmx(DCTELEM *blocks)
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
373 {
471
d7f65ea52aaa * reimplemented remaing avg_ pixel functions
kabi
parents: 448
diff changeset
374 __asm __volatile(
296
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
375 "pxor %%mm7, %%mm7 \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
376 "movl $-128*6, %%eax \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
377 "1: \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
378 "movq %%mm7, (%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
379 "movq %%mm7, 8(%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
380 "movq %%mm7, 16(%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
381 "movq %%mm7, 24(%0, %%eax) \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
382 "addl $32, %%eax \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
383 " js 1b \n\t"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
384 : : "r" (((int)blocks)+128*6)
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
385 : "%eax"
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
386 );
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
387 }
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
388
393
bf164fce2c14 removed debug function
glantau
parents: 387
diff changeset
389 #if 0
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
390 static void just_return() { return; }
393
bf164fce2c14 removed debug function
glantau
parents: 387
diff changeset
391 #endif
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
392
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
393 void dsputil_init_mmx(void)
986e461dc072 Initial revision
glantau
parents:
diff changeset
394 {
986e461dc072 Initial revision
glantau
parents:
diff changeset
395 mm_flags = mm_support();
188
5d56c2f7e712 print cpu flags
uid46427
parents: 174
diff changeset
396 #if 1
5d56c2f7e712 print cpu flags
uid46427
parents: 174
diff changeset
397 printf("libavcodec: CPU flags:");
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
398 if (mm_flags & MM_MMX)
986e461dc072 Initial revision
glantau
parents:
diff changeset
399 printf(" mmx");
986e461dc072 Initial revision
glantau
parents:
diff changeset
400 if (mm_flags & MM_MMXEXT)
986e461dc072 Initial revision
glantau
parents:
diff changeset
401 printf(" mmxext");
986e461dc072 Initial revision
glantau
parents:
diff changeset
402 if (mm_flags & MM_3DNOW)
986e461dc072 Initial revision
glantau
parents:
diff changeset
403 printf(" 3dnow");
986e461dc072 Initial revision
glantau
parents:
diff changeset
404 if (mm_flags & MM_SSE)
986e461dc072 Initial revision
glantau
parents:
diff changeset
405 printf(" sse");
986e461dc072 Initial revision
glantau
parents:
diff changeset
406 if (mm_flags & MM_SSE2)
986e461dc072 Initial revision
glantau
parents:
diff changeset
407 printf(" sse2");
986e461dc072 Initial revision
glantau
parents:
diff changeset
408 printf("\n");
986e461dc072 Initial revision
glantau
parents:
diff changeset
409 #endif
986e461dc072 Initial revision
glantau
parents:
diff changeset
410
986e461dc072 Initial revision
glantau
parents:
diff changeset
411 if (mm_flags & MM_MMX) {
986e461dc072 Initial revision
glantau
parents:
diff changeset
412 get_pixels = get_pixels_mmx;
324
9c6f056f0e41 fixed mpeg4 time stuff on encoding
michaelni
parents: 296
diff changeset
413 diff_pixels = diff_pixels_mmx;
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
414 put_pixels_clamped = put_pixels_clamped_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
415 add_pixels_clamped = add_pixels_clamped_mmx;
296
c1a8a1b4a24b sizeof(s->block) isnt 64*6*2 anymore bugfix
michaelni
parents: 294
diff changeset
416 clear_blocks= clear_blocks_mmx;
415
1c3f42442fba * added simple test main - see comments about how to
kabi
parents: 402
diff changeset
417
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
418 pix_abs16x16 = pix_abs16x16_mmx;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
419 pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
420 pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
421 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
422 pix_abs8x8 = pix_abs8x8_mmx;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
423 pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
424 pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
425 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
574
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
426
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
427 put_pixels_tab[0] = put_pixels_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
428 put_pixels_tab[1] = put_pixels_x2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
429 put_pixels_tab[2] = put_pixels_y2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
430 put_pixels_tab[3] = put_pixels_xy2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
431
986e461dc072 Initial revision
glantau
parents:
diff changeset
432 put_no_rnd_pixels_tab[0] = put_pixels_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
433 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
434 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
435 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx;
415
1c3f42442fba * added simple test main - see comments about how to
kabi
parents: 402
diff changeset
436
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
437 avg_pixels_tab[0] = avg_pixels_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
438 avg_pixels_tab[1] = avg_pixels_x2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
439 avg_pixels_tab[2] = avg_pixels_y2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
440 avg_pixels_tab[3] = avg_pixels_xy2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
441
986e461dc072 Initial revision
glantau
parents:
diff changeset
442 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
443 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
444 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx;
986e461dc072 Initial revision
glantau
parents:
diff changeset
445 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx;
386
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
446
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
447 if (mm_flags & MM_MMXEXT) {
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
448 pix_abs16x16 = pix_abs16x16_mmx2;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
449 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
450 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
451 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
415
1c3f42442fba * added simple test main - see comments about how to
kabi
parents: 402
diff changeset
452
294
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
453 pix_abs8x8 = pix_abs8x8_mmx2;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
454 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
455 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
944632089814 4MV motion estimation (not finished yet)
michaelni
parents: 247
diff changeset
456 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
386
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
457
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
458 put_pixels_tab[1] = put_pixels_x2_mmx2;
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
459 put_pixels_tab[2] = put_pixels_y2_mmx2;
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
460 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2;
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
461 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2;
415
1c3f42442fba * added simple test main - see comments about how to
kabi
parents: 402
diff changeset
462
386
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
463 avg_pixels_tab[0] = avg_pixels_mmx2;
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
464 avg_pixels_tab[1] = avg_pixels_x2_mmx2;
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
465 avg_pixels_tab[2] = avg_pixels_y2_mmx2;
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
466 avg_pixels_tab[3] = avg_pixels_xy2_mmx2;
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
467 } else if (mm_flags & MM_3DNOW) {
986e461dc072 Initial revision
glantau
parents:
diff changeset
468 put_pixels_tab[1] = put_pixels_x2_3dnow;
986e461dc072 Initial revision
glantau
parents:
diff changeset
469 put_pixels_tab[2] = put_pixels_y2_3dnow;
386
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
470 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow;
f49629bab18d hopefully faster mmx2&3dnow MC
michaelni
parents: 342
diff changeset
471 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow;
393
bf164fce2c14 removed debug function
glantau
parents: 387
diff changeset
472
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
473 avg_pixels_tab[0] = avg_pixels_3dnow;
986e461dc072 Initial revision
glantau
parents:
diff changeset
474 avg_pixels_tab[1] = avg_pixels_x2_3dnow;
986e461dc072 Initial revision
glantau
parents:
diff changeset
475 avg_pixels_tab[2] = avg_pixels_y2_3dnow;
986e461dc072 Initial revision
glantau
parents:
diff changeset
476 avg_pixels_tab[3] = avg_pixels_xy2_3dnow;
986e461dc072 Initial revision
glantau
parents:
diff changeset
477 }
19
82d4c9be9873 MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents: 8
diff changeset
478
42
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
479 /* idct */
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
480 if (mm_flags & MM_MMXEXT) {
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
481 ff_idct = ff_mmxext_idct;
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
482 } else {
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
483 ff_idct = ff_mmx_idct;
8068c4bce9c1 added mmx idct
glantau
parents: 19
diff changeset
484 }
174
ac5075a55488 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents: 151
diff changeset
485 #ifdef SIMPLE_IDCT
ac5075a55488 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents: 151
diff changeset
486 // ff_idct = simple_idct;
ac5075a55488 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents: 151
diff changeset
487 ff_idct = simple_idct_mmx;
ac5075a55488 new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents: 151
diff changeset
488 #endif
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
489 }
247
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
490
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
491 #if 0
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
492 // for speed testing
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
493 get_pixels = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
494 put_pixels_clamped = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
495 add_pixels_clamped = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
496
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
497 pix_abs16x16 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
498 pix_abs16x16_x2 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
499 pix_abs16x16_y2 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
500 pix_abs16x16_xy2 = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
501
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
502 put_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
503 put_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
504 put_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
505 put_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
506
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
507 put_no_rnd_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
508 put_no_rnd_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
509 put_no_rnd_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
510 put_no_rnd_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
511
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
512 avg_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
513 avg_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
514 avg_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
515 avg_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
516
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
517 avg_no_rnd_pixels_tab[0] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
518 avg_no_rnd_pixels_tab[1] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
519 avg_no_rnd_pixels_tab[2] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
520 avg_no_rnd_pixels_tab[3] = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
521
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
522 //av_fdct = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
523 //ff_idct = just_return;
6f48cacd9ed9 * some modifications to allow gcc to compile same code for -fPIC
kabi
parents: 188
diff changeset
524 #endif
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
525 }
402
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
526
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
527 /* remove any non bit exact operation (testing purpose). NOTE that
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
528 this function should be kept as small as possible because it is
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
529 always difficult to test automatically non bit exact cases. */
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
530 void dsputil_set_bit_exact_mmx(void)
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
531 {
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
532 if (mm_flags & MM_MMX) {
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
533 if (mm_flags & MM_MMXEXT) {
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
534 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
535 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
536 avg_pixels_tab[3] = avg_pixels_xy2_mmx;
574
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
537
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
538 pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
539 pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
540 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
541 pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
542 pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
c9b17c1a02e0 pix_abs mmx2 isnt bit-exact ...
michaelni
parents: 471
diff changeset
543 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
402
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
544 } else if (mm_flags & MM_3DNOW) {
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
545 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
546 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
547 avg_pixels_tab[3] = avg_pixels_xy2_mmx;
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
548 }
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
549 }
92d143c2d5a8 removed unused code
glantau
parents: 393
diff changeset
550 }