annotate x86/dsputil_mmx.c @ 12454:f4355cd85faa libavcodec

Port latest x264 deblock asm (before they moved to using NV12 as internal format), LGPL'ed with permission from Jason and Loren. This includes mmx2 code, so remove inline asm from h264dsp_mmx.c accordingly.
author rbultje
date Fri, 03 Sep 2010 16:52:46 +0000
parents 3941687b4fa9
children a5ddb39627fd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1 /*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2 * MMX optimized DSP utils
8629
04423b2f6e0b cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents: 8596
diff changeset
3 * Copyright (c) 2000, 2001 Fabrice Bellard
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
5 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
6 * This file is part of FFmpeg.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
7 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
12 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
16 * Lesser General Public License for more details.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
17 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
21 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
23 */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
24
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
25 #include "libavutil/x86_cpu.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
26 #include "libavcodec/dsputil.h"
11499
2a4dc3c0b012 Move H264 dsputil functions into their own struct
mru
parents: 11485
diff changeset
27 #include "libavcodec/h264dsp.h"
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
28 #include "libavcodec/mpegvideo.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
29 #include "libavcodec/simple_idct.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
30 #include "dsputil_mmx.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
31 #include "idct_xvid.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
32
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
33 //#undef NDEBUG
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
34 //#include <assert.h>
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
35
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
36 /* pixel operations */
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
37 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
38 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
39
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
40 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
41 {0x8000000080000000ULL, 0x8000000080000000ULL};
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
42
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
43 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
12143
fa452b243aa6 Make ff_pw_4 128 bits
conrad
parents: 12086
diff changeset
44 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
45 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
46 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
12205
d38e8565ba05 VP8 MBedge loopfilter MMX/MMX2/SSE2 functions for both luma (width=16)
rbultje
parents: 12168
diff changeset
47 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9 ) = {0x0009000900090009ULL, 0x0009000900090009ULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
48 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
49 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
12205
d38e8565ba05 VP8 MBedge loopfilter MMX/MMX2/SSE2 functions for both luma (width=16)
rbultje
parents: 12168
diff changeset
50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18 ) = {0x0012001200120012ULL, 0x0012001200120012ULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
51 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
12205
d38e8565ba05 VP8 MBedge loopfilter MMX/MMX2/SSE2 functions for both luma (width=16)
rbultje
parents: 12168
diff changeset
52 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27 ) = {0x001B001B001B001BULL, 0x001B001B001B001BULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
54 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
55 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
12206
fe243bb5ef61 Move ff_pw_* from vc1dsp_mmx.c to dsputil_mmx.c
conrad
parents: 12205
diff changeset
56 DECLARE_ALIGNED(8, const uint64_t, ff_pw_53 ) = 0x0035003500350035ULL;
12205
d38e8565ba05 VP8 MBedge loopfilter MMX/MMX2/SSE2 functions for both luma (width=16)
rbultje
parents: 12168
diff changeset
57 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63 ) = {0x003F003F003F003FULL, 0x003F003F003F003FULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
59 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
62
12454
f4355cd85faa Port latest x264 deblock asm (before they moved to using NV12 as internal
rbultje
parents: 12450
diff changeset
63 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x0000000000000000ULL};
12168
b246b214c2e9 VP8 H/V inner loopfilter MMX/MMXEXT/SSE2 optimizations.
rbultje
parents: 12143
diff changeset
64 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x0101010101010101ULL};
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents: 11826
diff changeset
65 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x0303030303030303ULL};
12086
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 11981
diff changeset
66 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x0404040404040404ULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
67 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
12086
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 11981
diff changeset
70 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x8080808080808080ULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
71 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
12454
f4355cd85faa Port latest x264 deblock asm (before they moved to using NV12 as internal
rbultje
parents: 12450
diff changeset
72 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1 ) = {0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A1A1A1A1A1A1ULL};
12086
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 11981
diff changeset
73 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F8F8F8F8F8F8ULL};
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
74 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
12086
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 11981
diff changeset
75 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL};
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
76
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
77 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
78 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
79
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
80 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
81 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
82
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
83 #define MOVQ_BFE(regd) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
84 __asm__ volatile ( \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
85 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
86 "paddb %%" #regd ", %%" #regd " \n\t" ::)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
87
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
88 #ifndef PIC
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
89 #define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
90 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
91 #else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
92 // for shared library it's better to use this way for accessing constants
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
93 // pcmpeqd -> -1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
94 #define MOVQ_BONE(regd) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
95 __asm__ volatile ( \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
96 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
97 "psrlw $15, %%" #regd " \n\t" \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
98 "packuswb %%" #regd ", %%" #regd " \n\t" ::)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
99
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
100 #define MOVQ_WTWO(regd) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
101 __asm__ volatile ( \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
102 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
103 "psrlw $15, %%" #regd " \n\t" \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
104 "psllw $1, %%" #regd " \n\t"::)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
105
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
106 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
107
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
108 // using regr as temporary and for the output result
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
109 // first argument is unmodifed and second is trashed
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
110 // regfe is supposed to contain 0xfefefefefefefefe
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
111 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
112 "movq " #rega ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
113 "pand " #regb ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
114 "pxor " #rega ", " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
115 "pand " #regfe "," #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
116 "psrlq $1, " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
117 "paddb " #regb ", " #regr " \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
118
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
119 #define PAVGB_MMX(rega, regb, regr, regfe) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
120 "movq " #rega ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
121 "por " #regb ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
122 "pxor " #rega ", " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
123 "pand " #regfe "," #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
124 "psrlq $1, " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
125 "psubb " #regb ", " #regr " \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
126
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
127 // mm6 is supposed to contain 0xfefefefefefefefe
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
128 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
129 "movq " #rega ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
130 "movq " #regc ", " #regp " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
131 "pand " #regb ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
132 "pand " #regd ", " #regp " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
133 "pxor " #rega ", " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
134 "pxor " #regc ", " #regd " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
135 "pand %%mm6, " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
136 "pand %%mm6, " #regd " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
137 "psrlq $1, " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
138 "psrlq $1, " #regd " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
139 "paddb " #regb ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
140 "paddb " #regd ", " #regp " \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
141
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
142 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
143 "movq " #rega ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
144 "movq " #regc ", " #regp " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
145 "por " #regb ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
146 "por " #regd ", " #regp " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
147 "pxor " #rega ", " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
148 "pxor " #regc ", " #regd " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
149 "pand %%mm6, " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
150 "pand %%mm6, " #regd " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
151 "psrlq $1, " #regd " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
152 "psrlq $1, " #regb " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
153 "psubb " #regb ", " #regr " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
154 "psubb " #regd ", " #regp " \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
155
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
156 /***********************************/
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
157 /* MMX no rounding */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
158 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
159 #define SET_RND MOVQ_WONE
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
160 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
161 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
9445
41245484dc0b avg_ pixel functions need to use (dst+pix+1)>>1 to average with existing
conrad
parents: 9441
diff changeset
162 #define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
163
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
164 #include "dsputil_mmx_rnd_template.c"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
165
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
166 #undef DEF
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
167 #undef SET_RND
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
168 #undef PAVGBP
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
169 #undef PAVGB
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
170 /***********************************/
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
171 /* MMX rounding */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
172
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
173 #define DEF(x, y) x ## _ ## y ##_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
174 #define SET_RND MOVQ_WTWO
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
175 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
176 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
177
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
178 #include "dsputil_mmx_rnd_template.c"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
179
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
180 #undef DEF
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
181 #undef SET_RND
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
182 #undef PAVGBP
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
183 #undef PAVGB
9445
41245484dc0b avg_ pixel functions need to use (dst+pix+1)>>1 to average with existing
conrad
parents: 9441
diff changeset
184 #undef OP_AVG
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
185
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
186 /***********************************/
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
187 /* 3Dnow specific */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
188
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
189 #define DEF(x) x ## _3dnow
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
190 #define PAVGB "pavgusb"
9445
41245484dc0b avg_ pixel functions need to use (dst+pix+1)>>1 to average with existing
conrad
parents: 9441
diff changeset
191 #define OP_AVG PAVGB
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
192
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
193 #include "dsputil_mmx_avg_template.c"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
194
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
195 #undef DEF
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
196 #undef PAVGB
9445
41245484dc0b avg_ pixel functions need to use (dst+pix+1)>>1 to average with existing
conrad
parents: 9441
diff changeset
197 #undef OP_AVG
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
198
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
199 /***********************************/
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
200 /* MMX2 specific */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
201
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
202 #define DEF(x) x ## _mmx2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
203
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
204 /* Introduced only in MMX2 set */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
205 #define PAVGB "pavgb"
9445
41245484dc0b avg_ pixel functions need to use (dst+pix+1)>>1 to average with existing
conrad
parents: 9441
diff changeset
206 #define OP_AVG PAVGB
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
207
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
208 #include "dsputil_mmx_avg_template.c"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
209
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
210 #undef DEF
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
211 #undef PAVGB
9445
41245484dc0b avg_ pixel functions need to use (dst+pix+1)>>1 to average with existing
conrad
parents: 9441
diff changeset
212 #undef OP_AVG
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
213
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
214 #define put_no_rnd_pixels16_mmx put_pixels16_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
215 #define put_no_rnd_pixels8_mmx put_pixels8_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
216 #define put_pixels16_mmx2 put_pixels16_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
217 #define put_pixels8_mmx2 put_pixels8_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
218 #define put_pixels4_mmx2 put_pixels4_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
219 #define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
220 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
221 #define put_pixels16_3dnow put_pixels16_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
222 #define put_pixels8_3dnow put_pixels8_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
223 #define put_pixels4_3dnow put_pixels4_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
224 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
225 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
226
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
227 /***********************************/
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
228 /* standard MMX */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
229
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
230 void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
231 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
232 const DCTELEM *p;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
233 uint8_t *pix;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
234
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
235 /* read the pixels */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
236 p = block;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
237 pix = pixels;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
238 /* unrolled loop */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
239 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
240 "movq %3, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
241 "movq 8%3, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
242 "movq 16%3, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
243 "movq 24%3, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
244 "movq 32%3, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
245 "movq 40%3, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
246 "movq 48%3, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
247 "movq 56%3, %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
248 "packuswb %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
249 "packuswb %%mm3, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
250 "packuswb %%mm5, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
251 "packuswb %%mm7, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
252 "movq %%mm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
253 "movq %%mm2, (%0, %1) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
254 "movq %%mm4, (%0, %1, 2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
255 "movq %%mm6, (%0, %2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
256 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
257 :"memory");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
258 pix += line_size*4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
259 p += 32;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
260
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
261 // if here would be an exact copy of the code above
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
262 // compiler would generate some very strange code
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
263 // thus using "r"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
264 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
265 "movq (%3), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
266 "movq 8(%3), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
267 "movq 16(%3), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
268 "movq 24(%3), %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
269 "movq 32(%3), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
270 "movq 40(%3), %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
271 "movq 48(%3), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
272 "movq 56(%3), %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
273 "packuswb %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
274 "packuswb %%mm3, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
275 "packuswb %%mm5, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
276 "packuswb %%mm7, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
277 "movq %%mm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
278 "movq %%mm2, (%0, %1) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
279 "movq %%mm4, (%0, %1, 2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
280 "movq %%mm6, (%0, %2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
281 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
282 :"memory");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
283 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
284
10961
34a65026fa06 Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents: 10766
diff changeset
285 DECLARE_ASM_CONST(8, uint8_t, ff_vector128)[8] =
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
286 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
287
9337
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
288 #define put_signed_pixels_clamped_mmx_half(off) \
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
289 "movq "#off"(%2), %%mm1 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
290 "movq 16+"#off"(%2), %%mm2 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
291 "movq 32+"#off"(%2), %%mm3 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
292 "movq 48+"#off"(%2), %%mm4 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
293 "packsswb 8+"#off"(%2), %%mm1 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
294 "packsswb 24+"#off"(%2), %%mm2 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
295 "packsswb 40+"#off"(%2), %%mm3 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
296 "packsswb 56+"#off"(%2), %%mm4 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
297 "paddb %%mm0, %%mm1 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
298 "paddb %%mm0, %%mm2 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
299 "paddb %%mm0, %%mm3 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
300 "paddb %%mm0, %%mm4 \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
301 "movq %%mm1, (%0) \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
302 "movq %%mm2, (%0, %3) \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
303 "movq %%mm3, (%0, %3, 2) \n\t"\
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
304 "movq %%mm4, (%0, %1) \n\t"
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
305
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
306 void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
307 {
9337
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
308 x86_reg line_skip = line_size;
9341
06532529c428 Mark line_skip3 asm argument as output-only instead of using av_uninit.
reimar
parents: 9340
diff changeset
309 x86_reg line_skip3;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
310
9337
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
311 __asm__ volatile (
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
312 "movq "MANGLE(ff_vector128)", %%mm0 \n\t"
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
313 "lea (%3, %3, 2), %1 \n\t"
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
314 put_signed_pixels_clamped_mmx_half(0)
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
315 "lea (%0, %3, 4), %0 \n\t"
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
316 put_signed_pixels_clamped_mmx_half(64)
9341
06532529c428 Mark line_skip3 asm argument as output-only instead of using av_uninit.
reimar
parents: 9340
diff changeset
317 :"+&r" (pixels), "=&r" (line_skip3)
9337
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
318 :"r" (block), "r"(line_skip)
a0d54042ea37 Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
alexc
parents: 8818
diff changeset
319 :"memory");
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
320 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
321
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
322 void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
323 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
324 const DCTELEM *p;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
325 uint8_t *pix;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
326 int i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
327
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
328 /* read the pixels */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
329 p = block;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
330 pix = pixels;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
331 MOVQ_ZERO(mm7);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
332 i = 4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
333 do {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
334 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
335 "movq (%2), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
336 "movq 8(%2), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
337 "movq 16(%2), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
338 "movq 24(%2), %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
339 "movq %0, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
340 "movq %1, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
341 "movq %%mm4, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
342 "punpcklbw %%mm7, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
343 "punpckhbw %%mm7, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
344 "paddsw %%mm4, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
345 "paddsw %%mm5, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
346 "movq %%mm6, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
347 "punpcklbw %%mm7, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
348 "punpckhbw %%mm7, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
349 "paddsw %%mm6, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
350 "paddsw %%mm5, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
351 "packuswb %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
352 "packuswb %%mm3, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
353 "movq %%mm0, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
354 "movq %%mm2, %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
355 :"+m"(*pix), "+m"(*(pix+line_size))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
356 :"r"(p)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
357 :"memory");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
358 pix += line_size*2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
359 p += 16;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
360 } while (--i);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
361 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
362
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
363 static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
364 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
365 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
366 "lea (%3, %3), %%"REG_a" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
367 ASMALIGN(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
368 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
369 "movd (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
370 "movd (%1, %3), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
371 "movd %%mm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
372 "movd %%mm1, (%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
373 "add %%"REG_a", %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
374 "add %%"REG_a", %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
375 "movd (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
376 "movd (%1, %3), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
377 "movd %%mm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
378 "movd %%mm1, (%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
379 "add %%"REG_a", %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
380 "add %%"REG_a", %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
381 "subl $4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
382 "jnz 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
383 : "+g"(h), "+r" (pixels), "+r" (block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
384 : "r"((x86_reg)line_size)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
385 : "%"REG_a, "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
386 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
387 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
388
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
389 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
390 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
391 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
392 "lea (%3, %3), %%"REG_a" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
393 ASMALIGN(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
394 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
395 "movq (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
396 "movq (%1, %3), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
397 "movq %%mm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
398 "movq %%mm1, (%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
399 "add %%"REG_a", %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
400 "add %%"REG_a", %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
401 "movq (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
402 "movq (%1, %3), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
403 "movq %%mm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
404 "movq %%mm1, (%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
405 "add %%"REG_a", %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
406 "add %%"REG_a", %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
407 "subl $4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
408 "jnz 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
409 : "+g"(h), "+r" (pixels), "+r" (block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
410 : "r"((x86_reg)line_size)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
411 : "%"REG_a, "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
412 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
413 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
414
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
415 static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
416 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
417 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
418 "lea (%3, %3), %%"REG_a" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
419 ASMALIGN(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
420 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
421 "movq (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
422 "movq 8(%1), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
423 "movq (%1, %3), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
424 "movq 8(%1, %3), %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
425 "movq %%mm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
426 "movq %%mm4, 8(%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
427 "movq %%mm1, (%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
428 "movq %%mm5, 8(%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
429 "add %%"REG_a", %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
430 "add %%"REG_a", %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
431 "movq (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
432 "movq 8(%1), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
433 "movq (%1, %3), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
434 "movq 8(%1, %3), %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
435 "movq %%mm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
436 "movq %%mm4, 8(%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
437 "movq %%mm1, (%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
438 "movq %%mm5, 8(%2, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
439 "add %%"REG_a", %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
440 "add %%"REG_a", %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
441 "subl $4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
442 "jnz 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
443 : "+g"(h), "+r" (pixels), "+r" (block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
444 : "r"((x86_reg)line_size)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
445 : "%"REG_a, "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
446 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
447 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
448
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
449 static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
450 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
451 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
452 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
453 "movdqu (%1), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
454 "movdqu (%1,%3), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
455 "movdqu (%1,%3,2), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
456 "movdqu (%1,%4), %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
457 "movdqa %%xmm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
458 "movdqa %%xmm1, (%2,%3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
459 "movdqa %%xmm2, (%2,%3,2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
460 "movdqa %%xmm3, (%2,%4) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
461 "subl $4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
462 "lea (%1,%3,4), %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
463 "lea (%2,%3,4), %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
464 "jnz 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
465 : "+g"(h), "+r" (pixels), "+r" (block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
466 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
467 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
468 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
469 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
470
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
471 static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
472 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
473 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
474 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
475 "movdqu (%1), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
476 "movdqu (%1,%3), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
477 "movdqu (%1,%3,2), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
478 "movdqu (%1,%4), %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
479 "pavgb (%2), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
480 "pavgb (%2,%3), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
481 "pavgb (%2,%3,2), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
482 "pavgb (%2,%4), %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
483 "movdqa %%xmm0, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
484 "movdqa %%xmm1, (%2,%3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
485 "movdqa %%xmm2, (%2,%3,2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
486 "movdqa %%xmm3, (%2,%4) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
487 "subl $4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
488 "lea (%1,%3,4), %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
489 "lea (%2,%3,4), %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
490 "jnz 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
491 : "+g"(h), "+r" (pixels), "+r" (block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
492 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
493 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
494 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
495 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
496
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
497 #define CLEAR_BLOCKS(name,n) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
498 static void name(DCTELEM *blocks)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
499 {\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
500 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
501 "pxor %%mm7, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
502 "mov %1, %%"REG_a" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
503 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
504 "movq %%mm7, (%0, %%"REG_a") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
505 "movq %%mm7, 8(%0, %%"REG_a") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
506 "movq %%mm7, 16(%0, %%"REG_a") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
507 "movq %%mm7, 24(%0, %%"REG_a") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
508 "add $32, %%"REG_a" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
509 " js 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
510 : : "r" (((uint8_t *)blocks)+128*n),\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
511 "i" (-128*n)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
512 : "%"REG_a\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
513 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
514 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
515 CLEAR_BLOCKS(clear_blocks_mmx, 6)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
516 CLEAR_BLOCKS(clear_block_mmx, 1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
517
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
518 static void clear_block_sse(DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
519 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
520 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
521 "xorps %%xmm0, %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
522 "movaps %%xmm0, (%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
523 "movaps %%xmm0, 16(%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
524 "movaps %%xmm0, 32(%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
525 "movaps %%xmm0, 48(%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
526 "movaps %%xmm0, 64(%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
527 "movaps %%xmm0, 80(%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
528 "movaps %%xmm0, 96(%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
529 "movaps %%xmm0, 112(%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
530 :: "r"(block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
531 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
532 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
533 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
534
9861
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
535 static void clear_blocks_sse(DCTELEM *blocks)
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
536 {\
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
537 __asm__ volatile(
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
538 "xorps %%xmm0, %%xmm0 \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
539 "mov %1, %%"REG_a" \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
540 "1: \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
541 "movaps %%xmm0, (%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
542 "movaps %%xmm0, 16(%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
543 "movaps %%xmm0, 32(%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
544 "movaps %%xmm0, 48(%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
545 "movaps %%xmm0, 64(%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
546 "movaps %%xmm0, 80(%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
547 "movaps %%xmm0, 96(%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
548 "movaps %%xmm0, 112(%0, %%"REG_a") \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
549 "add $128, %%"REG_a" \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
550 " js 1b \n"
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
551 : : "r" (((uint8_t *)blocks)+128*6),
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
552 "i" (-128*6)
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
553 : "%"REG_a
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
554 );
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
555 }
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
556
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
557 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
558 x86_reg i=0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
559 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
560 "jmp 2f \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
561 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
562 "movq (%1, %0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
563 "movq (%2, %0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
564 "paddb %%mm0, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
565 "movq %%mm1, (%2, %0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
566 "movq 8(%1, %0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
567 "movq 8(%2, %0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
568 "paddb %%mm0, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
569 "movq %%mm1, 8(%2, %0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
570 "add $16, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
571 "2: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
572 "cmp %3, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
573 " js 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
574 : "+r" (i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
575 : "r"(src), "r"(dst), "r"((x86_reg)w-15)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
576 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
577 for(; i<w; i++)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
578 dst[i+0] += src[i+0];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
579 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
580
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
581 static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
582 x86_reg i=0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
583 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
584 "jmp 2f \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
585 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
586 "movq (%2, %0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
587 "movq 8(%2, %0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
588 "paddb (%3, %0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
589 "paddb 8(%3, %0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
590 "movq %%mm0, (%1, %0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
591 "movq %%mm1, 8(%1, %0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
592 "add $16, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
593 "2: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
594 "cmp %4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
595 " js 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
596 : "+r" (i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
597 : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
598 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
599 for(; i<w; i++)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
600 dst[i] = src1[i] + src2[i];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
601 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
602
8798
a5c8210814d7 Add check whether the compiler/assembler supports 10 or more operands.
diego
parents: 8760
diff changeset
603 #if HAVE_7REGS && HAVE_TEN_OPERANDS
10431
546b7ebeaf07 huffyuv: add some const qualifiers
lorenm
parents: 10430
diff changeset
604 static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) {
8760
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
605 x86_reg w2 = -w;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
606 x86_reg x;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
607 int l = *left & 0xff;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
608 int tl = *left_top & 0xff;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
609 int t;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
610 __asm__ volatile(
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
611 "mov %7, %3 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
612 "1: \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
613 "movzx (%3,%4), %2 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
614 "mov %2, %k3 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
615 "sub %b1, %b3 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
616 "add %b0, %b3 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
617 "mov %2, %1 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
618 "cmp %0, %2 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
619 "cmovg %0, %2 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
620 "cmovg %1, %0 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
621 "cmp %k3, %0 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
622 "cmovg %k3, %0 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
623 "mov %7, %3 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
624 "cmp %2, %0 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
625 "cmovl %2, %0 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
626 "add (%6,%4), %b0 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
627 "mov %b0, (%5,%4) \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
628 "inc %4 \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
629 "jl 1b \n"
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
630 :"+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2)
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
631 :"r"(dst+w), "r"(diff+w), "rm"(top+w)
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
632 );
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
633 *left = l;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
634 *left_top = tl;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
635 }
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
636 #endif
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
637
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
638 #define H263_LOOP_FILTER \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
639 "pxor %%mm7, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
640 "movq %0, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
641 "movq %0, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
642 "movq %3, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
643 "movq %3, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
644 "punpcklbw %%mm7, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
645 "punpckhbw %%mm7, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
646 "punpcklbw %%mm7, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
647 "punpckhbw %%mm7, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
648 "psubw %%mm2, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
649 "psubw %%mm3, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
650 "movq %1, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
651 "movq %1, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
652 "movq %2, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
653 "movq %2, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
654 "punpcklbw %%mm7, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
655 "punpckhbw %%mm7, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
656 "punpcklbw %%mm7, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
657 "punpckhbw %%mm7, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
658 "psubw %%mm2, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
659 "psubw %%mm3, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
660 "psllw $2, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
661 "psllw $2, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
662 "paddw %%mm0, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
663 "paddw %%mm1, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
664 "pxor %%mm6, %%mm6 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
665 "pcmpgtw %%mm4, %%mm6 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
666 "pcmpgtw %%mm5, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
667 "pxor %%mm6, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
668 "pxor %%mm7, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
669 "psubw %%mm6, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
670 "psubw %%mm7, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
671 "psrlw $3, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
672 "psrlw $3, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
673 "packuswb %%mm5, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
674 "packsswb %%mm7, %%mm6 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
675 "pxor %%mm7, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
676 "movd %4, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
677 "punpcklbw %%mm2, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
678 "punpcklbw %%mm2, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
679 "punpcklbw %%mm2, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
680 "psubusb %%mm4, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
681 "movq %%mm2, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
682 "psubusb %%mm4, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
683 "psubb %%mm3, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
684 "movq %1, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
685 "movq %2, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
686 "pxor %%mm6, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
687 "pxor %%mm6, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
688 "paddusb %%mm2, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
689 "psubusb %%mm2, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
690 "pxor %%mm6, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
691 "pxor %%mm6, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
692 "paddusb %%mm2, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
693 "packsswb %%mm1, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
694 "pcmpgtb %%mm0, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
695 "pxor %%mm7, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
696 "psubb %%mm7, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
697 "movq %%mm0, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
698 "psubusb %%mm2, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
699 "psubb %%mm0, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
700 "pand %5, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
701 "psrlw $2, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
702 "pxor %%mm7, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
703 "psubb %%mm7, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
704 "movq %0, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
705 "movq %3, %%mm6 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
706 "psubb %%mm1, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
707 "paddb %%mm1, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
708
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
709 static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
10749
5cca4b6c459d Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents: 10645
diff changeset
710 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
711 const int strength= ff_h263_loop_filter_strength[qscale];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
712
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
713 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
714
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
715 H263_LOOP_FILTER
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
716
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
717 "movq %%mm3, %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
718 "movq %%mm4, %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
719 "movq %%mm5, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
720 "movq %%mm6, %3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
721 : "+m" (*(uint64_t*)(src - 2*stride)),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
722 "+m" (*(uint64_t*)(src - 1*stride)),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
723 "+m" (*(uint64_t*)(src + 0*stride)),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
724 "+m" (*(uint64_t*)(src + 1*stride))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
725 : "g" (2*strength), "m"(ff_pb_FC)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
726 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
727 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
728 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
729
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
730 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
10749
5cca4b6c459d Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents: 10645
diff changeset
731 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
732 const int strength= ff_h263_loop_filter_strength[qscale];
10961
34a65026fa06 Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents: 10766
diff changeset
733 DECLARE_ALIGNED(8, uint64_t, temp)[4];
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
734 uint8_t *btemp= (uint8_t*)temp;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
735
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
736 src -= 2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
737
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
738 transpose4x4(btemp , src , 8, stride);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
739 transpose4x4(btemp+4, src + 4*stride, 8, stride);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
740 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
741 H263_LOOP_FILTER // 5 3 4 6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
742
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
743 : "+m" (temp[0]),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
744 "+m" (temp[1]),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
745 "+m" (temp[2]),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
746 "+m" (temp[3])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
747 : "g" (2*strength), "m"(ff_pb_FC)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
748 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
749
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
750 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
751 "movq %%mm5, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
752 "movq %%mm4, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
753 "punpcklbw %%mm3, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
754 "punpcklbw %%mm6, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
755 "punpckhbw %%mm3, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
756 "punpckhbw %%mm6, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
757 "movq %%mm5, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
758 "movq %%mm1, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
759 "punpcklwd %%mm4, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
760 "punpcklwd %%mm0, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
761 "punpckhwd %%mm4, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
762 "punpckhwd %%mm0, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
763 "movd %%mm5, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
764 "punpckhdq %%mm5, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
765 "movd %%mm5, (%0,%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
766 "movd %%mm3, (%0,%2,2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
767 "punpckhdq %%mm3, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
768 "movd %%mm3, (%0,%3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
769 "movd %%mm1, (%1) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
770 "punpckhdq %%mm1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
771 "movd %%mm1, (%1,%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
772 "movd %%mm6, (%1,%2,2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
773 "punpckhdq %%mm6, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
774 "movd %%mm6, (%1,%3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
775 :: "r" (src),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
776 "r" (src + 4*stride),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
777 "r" ((x86_reg) stride ),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
778 "r" ((x86_reg)(3*stride))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
779 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
780 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
781 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
782
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
783 /* draw the edges of width 'w' of an image of size width, height
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
784 this mmx version can only handle w==8 || w==16 */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
785 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
786 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
787 uint8_t *ptr, *last_line;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
788 int i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
789
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
790 last_line = buf + (height - 1) * wrap;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
791 /* left and right */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
792 ptr = buf;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
793 if(w==8)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
794 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
795 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
796 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
797 "movd (%0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
798 "punpcklbw %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
799 "punpcklwd %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
800 "punpckldq %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
801 "movq %%mm0, -8(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
802 "movq -8(%0, %2), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
803 "punpckhbw %%mm1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
804 "punpckhwd %%mm1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
805 "punpckhdq %%mm1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
806 "movq %%mm1, (%0, %2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
807 "add %1, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
808 "cmp %3, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
809 " jb 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
810 : "+r" (ptr)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
811 : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
812 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
813 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
814 else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
815 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
816 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
817 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
818 "movd (%0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
819 "punpcklbw %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
820 "punpcklwd %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
821 "punpckldq %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
822 "movq %%mm0, -8(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
823 "movq %%mm0, -16(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
824 "movq -8(%0, %2), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
825 "punpckhbw %%mm1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
826 "punpckhwd %%mm1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
827 "punpckhdq %%mm1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
828 "movq %%mm1, (%0, %2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
829 "movq %%mm1, 8(%0, %2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
830 "add %1, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
831 "cmp %3, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
832 " jb 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
833 : "+r" (ptr)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
834 : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
835 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
836 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
837
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
838 for(i=0;i<w;i+=4) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
839 /* top and bottom (and hopefully also the corners) */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
840 ptr= buf - (i + 1) * wrap - w;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
841 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
842 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
843 "movq (%1, %0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
844 "movq %%mm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
845 "movq %%mm0, (%0, %2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
846 "movq %%mm0, (%0, %2, 2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
847 "movq %%mm0, (%0, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
848 "add $8, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
849 "cmp %4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
850 " jb 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
851 : "+r" (ptr)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
852 : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
853 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
854 ptr= last_line + (i + 1) * wrap - w;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
855 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
856 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
857 "movq (%1, %0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
858 "movq %%mm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
859 "movq %%mm0, (%0, %2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
860 "movq %%mm0, (%0, %2, 2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
861 "movq %%mm0, (%0, %3) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
862 "add $8, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
863 "cmp %4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
864 " jb 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
865 : "+r" (ptr)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
866 : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
867 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
868 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
869 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
870
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
871 #define PAETH(cpu, abs3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
872 static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
873 {\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
874 x86_reg i = -bpp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
875 x86_reg end = w-3;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
876 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
877 "pxor %%mm7, %%mm7 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
878 "movd (%1,%0), %%mm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
879 "movd (%2,%0), %%mm1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
880 "punpcklbw %%mm7, %%mm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
881 "punpcklbw %%mm7, %%mm1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
882 "add %4, %0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
883 "1: \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
884 "movq %%mm1, %%mm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
885 "movd (%2,%0), %%mm1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
886 "movq %%mm2, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
887 "punpcklbw %%mm7, %%mm1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
888 "movq %%mm2, %%mm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
889 "psubw %%mm1, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
890 "psubw %%mm0, %%mm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
891 "movq %%mm3, %%mm5 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
892 "paddw %%mm4, %%mm5 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
893 abs3\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
894 "movq %%mm4, %%mm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
895 "pminsw %%mm5, %%mm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
896 "pcmpgtw %%mm6, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
897 "pcmpgtw %%mm5, %%mm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
898 "movq %%mm4, %%mm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
899 "pand %%mm3, %%mm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
900 "pandn %%mm3, %%mm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
901 "pandn %%mm0, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
902 "movd (%3,%0), %%mm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
903 "pand %%mm1, %%mm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
904 "pand %%mm4, %%mm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
905 "punpcklbw %%mm7, %%mm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
906 "movq %6, %%mm5 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
907 "paddw %%mm6, %%mm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
908 "paddw %%mm2, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
909 "paddw %%mm3, %%mm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
910 "pand %%mm5, %%mm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
911 "movq %%mm0, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
912 "packuswb %%mm3, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
913 "movd %%mm3, (%1,%0) \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
914 "add %4, %0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
915 "cmp %5, %0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
916 "jle 1b \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
917 :"+r"(i)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
918 :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
919 "m"(ff_pw_255)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
920 :"memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
921 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
922 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
923
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
924 #define ABS3_MMX2\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
925 "psubw %%mm5, %%mm7 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
926 "pmaxsw %%mm7, %%mm5 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
927 "pxor %%mm6, %%mm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
928 "pxor %%mm7, %%mm7 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
929 "psubw %%mm3, %%mm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
930 "psubw %%mm4, %%mm7 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
931 "pmaxsw %%mm6, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
932 "pmaxsw %%mm7, %%mm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
933 "pxor %%mm7, %%mm7 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
934
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
935 #define ABS3_SSSE3\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
936 "pabsw %%mm3, %%mm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
937 "pabsw %%mm4, %%mm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
938 "pabsw %%mm5, %%mm5 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
939
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
940 PAETH(mmx2, ABS3_MMX2)
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8527
diff changeset
941 #if HAVE_SSSE3
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
942 PAETH(ssse3, ABS3_SSSE3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
943 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
944
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
945 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
946 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
947 "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
948 "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
949 "movq "#in7", " #m3 " \n\t" /* d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
950 "movq "#in0", %%mm5 \n\t" /* D */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
951 "paddw " #m3 ", %%mm5 \n\t" /* x4 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
952 "psubw %%mm5, %%mm4 \n\t" /* 20x1 - x4 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
953 "movq "#in1", %%mm5 \n\t" /* C */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
954 "movq "#in2", %%mm6 \n\t" /* B */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
955 "paddw " #m6 ", %%mm5 \n\t" /* x3 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
956 "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
957 "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
958 "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
959 "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
960 "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
961 "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
962 "psraw $5, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
963 "packuswb %%mm5, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
964 OP(%%mm5, out, %%mm7, d)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
965
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
966 #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
967 static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
968 uint64_t temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
969 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
970 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
971 "pxor %%mm7, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
972 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
973 "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
974 "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
975 "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
976 "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
977 "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
978 "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
979 "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
980 "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
981 "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
982 "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
983 "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
984 "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
985 "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
986 "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
987 "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
988 "paddw %%mm3, %%mm5 \n\t" /* b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
989 "paddw %%mm2, %%mm6 \n\t" /* c */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
990 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
991 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
992 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
993 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
994 "paddw %%mm4, %%mm0 \n\t" /* a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
995 "paddw %%mm1, %%mm5 \n\t" /* d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
996 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
997 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
998 "paddw %6, %%mm6 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
999 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1000 "psraw $5, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1001 "movq %%mm0, %5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1002 /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1003 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1004 "movq 5(%0), %%mm0 \n\t" /* FGHIJKLM */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1005 "movq %%mm0, %%mm5 \n\t" /* FGHIJKLM */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1006 "movq %%mm0, %%mm6 \n\t" /* FGHIJKLM */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1007 "psrlq $8, %%mm0 \n\t" /* GHIJKLM0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1008 "psrlq $16, %%mm5 \n\t" /* HIJKLM00 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1009 "punpcklbw %%mm7, %%mm0 \n\t" /* 0G0H0I0J */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1010 "punpcklbw %%mm7, %%mm5 \n\t" /* 0H0I0J0K */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1011 "paddw %%mm0, %%mm2 \n\t" /* b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1012 "paddw %%mm5, %%mm3 \n\t" /* c */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1013 "paddw %%mm2, %%mm2 \n\t" /* 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1014 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1015 "movq %%mm6, %%mm2 \n\t" /* FGHIJKLM */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1016 "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1017 "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1018 "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1019 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1020 "paddw %%mm2, %%mm1 \n\t" /* a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1021 "paddw %%mm6, %%mm4 \n\t" /* d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1022 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1023 "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1024 "paddw %6, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1025 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1026 "psraw $5, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1027 "movq %5, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1028 "packuswb %%mm3, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1029 OP_MMX2(%%mm1, (%1),%%mm4, q)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1030 /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1031 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1032 "movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1033 "movq %%mm1, %%mm4 \n\t" /* JKLMNOPQ */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1034 "movq %%mm1, %%mm3 \n\t" /* JKLMNOPQ */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1035 "psrlq $8, %%mm1 \n\t" /* KLMNOPQ0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1036 "psrlq $16, %%mm4 \n\t" /* LMNOPQ00 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1037 "punpcklbw %%mm7, %%mm1 \n\t" /* 0K0L0M0N */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1038 "punpcklbw %%mm7, %%mm4 \n\t" /* 0L0M0N0O */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1039 "paddw %%mm1, %%mm5 \n\t" /* b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1040 "paddw %%mm4, %%mm0 \n\t" /* c */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1041 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1042 "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1043 "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1044 "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1045 "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1046 "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1047 "paddw %%mm3, %%mm2 \n\t" /* d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1048 "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1049 "movq %%mm5, %%mm2 \n\t" /* JKLMNOPQ */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1050 "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1051 "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1052 "paddw %%mm2, %%mm6 \n\t" /* a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1053 "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1054 "paddw %6, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1055 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1056 "psraw $5, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1057 /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1058 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1059 "paddw %%mm5, %%mm3 \n\t" /* a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1060 "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1061 "paddw %%mm4, %%mm6 \n\t" /* b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1062 "pshufw $0xBE, %%mm5, %%mm4 \n\t" /* 0P0Q0Q0P */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1063 "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0Q0Q0P0O */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1064 "paddw %%mm1, %%mm4 \n\t" /* c */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1065 "paddw %%mm2, %%mm5 \n\t" /* d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1066 "paddw %%mm6, %%mm6 \n\t" /* 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1067 "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1068 "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1069 "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1070 "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1071 "paddw %6, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1072 "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1073 "psraw $5, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1074 "packuswb %%mm4, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1075 OP_MMX2(%%mm0, 8(%1), %%mm4, q)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1076 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1077 "add %3, %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1078 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1079 "decl %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1080 " jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1081 : "+a"(src), "+c"(dst), "+D"(h)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1082 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1083 : "memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1084 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1085 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1086 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1087 static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1088 int i;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1089 int16_t temp[16];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1090 /* quick HACK, XXX FIXME MUST be optimized */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1091 for(i=0; i<h; i++)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1092 {\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1093 temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1094 temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1095 temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1096 temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1097 temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1098 temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1099 temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1100 temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1101 temp[ 8]= (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1102 temp[ 9]= (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1103 temp[10]= (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1104 temp[11]= (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1105 temp[12]= (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1106 temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1107 temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1108 temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1109 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1110 "movq (%0), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1111 "movq 8(%0), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1112 "paddw %2, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1113 "paddw %2, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1114 "psraw $5, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1115 "psraw $5, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1116 "packuswb %%mm1, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1117 OP_3DNOW(%%mm0, (%1), %%mm1, q)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1118 "movq 16(%0), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1119 "movq 24(%0), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1120 "paddw %2, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1121 "paddw %2, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1122 "psraw $5, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1123 "psraw $5, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1124 "packuswb %%mm1, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1125 OP_3DNOW(%%mm0, 8(%1), %%mm1, q)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1126 :: "r"(temp), "r"(dst), "m"(ROUNDER)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1127 : "memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1128 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1129 dst+=dstStride;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1130 src+=srcStride;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1131 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1132 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1133 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1134 static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1135 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1136 "pxor %%mm7, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1137 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1138 "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1139 "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1140 "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1141 "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1142 "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1143 "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1144 "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1145 "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1146 "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1147 "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1148 "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1149 "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1150 "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1151 "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1152 "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1153 "paddw %%mm3, %%mm5 \n\t" /* b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1154 "paddw %%mm2, %%mm6 \n\t" /* c */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1155 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1156 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1157 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1158 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1159 "paddw %%mm4, %%mm0 \n\t" /* a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1160 "paddw %%mm1, %%mm5 \n\t" /* d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1161 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1162 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1163 "paddw %5, %%mm6 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1164 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1165 "psraw $5, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1166 /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1167 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1168 "movd 5(%0), %%mm5 \n\t" /* FGHI */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1169 "punpcklbw %%mm7, %%mm5 \n\t" /* 0F0G0H0I */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1170 "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0G0H0I0I */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1171 "paddw %%mm5, %%mm1 \n\t" /* a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1172 "paddw %%mm6, %%mm2 \n\t" /* b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1173 "pshufw $0xBE, %%mm5, %%mm6 \n\t" /* 0H0I0I0H */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1174 "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0I0I0H0G */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1175 "paddw %%mm6, %%mm3 \n\t" /* c */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1176 "paddw %%mm5, %%mm4 \n\t" /* d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1177 "paddw %%mm2, %%mm2 \n\t" /* 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1178 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1179 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1180 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1181 "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1182 "paddw %5, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1183 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1184 "psraw $5, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1185 "packuswb %%mm3, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1186 OP_MMX2(%%mm0, (%1), %%mm4, q)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1187 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1188 "add %3, %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1189 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1190 "decl %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1191 " jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1192 : "+a"(src), "+c"(dst), "+d"(h)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1193 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1194 : "memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1195 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1196 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1197 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1198 static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1199 int i;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1200 int16_t temp[8];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1201 /* quick HACK, XXX FIXME MUST be optimized */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1202 for(i=0; i<h; i++)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1203 {\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1204 temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1205 temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1206 temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1207 temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1208 temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1209 temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1210 temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1211 temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1212 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1213 "movq (%0), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1214 "movq 8(%0), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1215 "paddw %2, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1216 "paddw %2, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1217 "psraw $5, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1218 "psraw $5, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1219 "packuswb %%mm1, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1220 OP_3DNOW(%%mm0, (%1), %%mm1, q)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1221 :: "r"(temp), "r"(dst), "m"(ROUNDER)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1222 :"memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1223 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1224 dst+=dstStride;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1225 src+=srcStride;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1226 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1227 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1228
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1229 #define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1230 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1231 static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1232 uint64_t temp[17*4];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1233 uint64_t *temp_ptr= temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1234 int count= 17;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1235 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1236 /*FIXME unroll */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1237 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1238 "pxor %%mm7, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1239 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1240 "movq (%0), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1241 "movq (%0), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1242 "movq 8(%0), %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1243 "movq 8(%0), %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1244 "punpcklbw %%mm7, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1245 "punpckhbw %%mm7, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1246 "punpcklbw %%mm7, %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1247 "punpckhbw %%mm7, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1248 "movq %%mm0, (%1) \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1249 "movq %%mm1, 17*8(%1) \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1250 "movq %%mm2, 2*17*8(%1) \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1251 "movq %%mm3, 3*17*8(%1) \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1252 "add $8, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1253 "add %3, %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1254 "decl %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1255 " jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1256 : "+r" (src), "+r" (temp_ptr), "+r"(count)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1257 : "r" ((x86_reg)srcStride)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1258 : "memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1259 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1260 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1261 temp_ptr= temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1262 count=4;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1263 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1264 /*FIXME reorder for speed */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1265 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1266 /*"pxor %%mm7, %%mm7 \n\t"*/\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1267 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1268 "movq (%0), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1269 "movq 8(%0), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1270 "movq 16(%0), %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1271 "movq 24(%0), %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1272 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1273 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1274 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1275 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1276 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1277 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1278 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1279 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1280 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1281 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1282 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1283 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1284 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1285 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1286 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1287 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1288 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1289 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1290 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1291 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1292 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1293 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1294 "add %4, %1 \n\t" \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1295 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1296 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1297 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1298 "add $136, %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1299 "add %6, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1300 "decl %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1301 " jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1302 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1303 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1304 : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(x86_reg)dstStride)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1305 :"memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1306 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1307 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1308 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1309 static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1310 uint64_t temp[9*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1311 uint64_t *temp_ptr= temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1312 int count= 9;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1313 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1314 /*FIXME unroll */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1315 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1316 "pxor %%mm7, %%mm7 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1317 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1318 "movq (%0), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1319 "movq (%0), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1320 "punpcklbw %%mm7, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1321 "punpckhbw %%mm7, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1322 "movq %%mm0, (%1) \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1323 "movq %%mm1, 9*8(%1) \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1324 "add $8, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1325 "add %3, %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1326 "decl %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1327 " jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1328 : "+r" (src), "+r" (temp_ptr), "+r"(count)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1329 : "r" ((x86_reg)srcStride)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1330 : "memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1331 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1332 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1333 temp_ptr= temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1334 count=2;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1335 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1336 /*FIXME reorder for speed */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1337 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1338 /*"pxor %%mm7, %%mm7 \n\t"*/\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1339 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1340 "movq (%0), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1341 "movq 8(%0), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1342 "movq 16(%0), %%mm2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1343 "movq 24(%0), %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1344 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1345 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1346 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1347 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1348 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1349 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1350 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1351 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1352 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1353 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1354 "add %4, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1355 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1356 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1357 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1358 "add $72, %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1359 "add %6, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1360 "decl %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1361 " jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1362 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1363 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1364 : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(x86_reg)dstStride)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1365 : "memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1366 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1367 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1368 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1369 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1370 OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1371 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1372 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1373 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1374 uint64_t temp[8];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1375 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1376 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1377 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1378 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1379 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1380 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1381 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1382 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1383 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1384 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1385 uint64_t temp[8];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1386 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1387 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1388 OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1389 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1390 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1391 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1392 uint64_t temp[8];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1393 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1394 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1395 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1396 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1397 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1398 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1399 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1400 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1401 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1402 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1403 uint64_t temp[8];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1404 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1405 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1406 OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1407 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1408 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1409 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1410 uint8_t * const halfH= ((uint8_t*)half) + 64;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1411 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1412 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1413 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1414 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1415 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1416 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1417 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1418 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1419 uint8_t * const halfH= ((uint8_t*)half) + 64;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1420 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1421 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1422 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1423 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1424 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1425 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1426 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1427 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1428 uint8_t * const halfH= ((uint8_t*)half) + 64;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1429 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1430 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1431 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1432 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1433 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1434 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1435 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1436 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1437 uint8_t * const halfH= ((uint8_t*)half) + 64;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1438 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1439 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1440 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1441 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1442 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1443 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1444 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1445 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1446 uint8_t * const halfH= ((uint8_t*)half) + 64;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1447 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1448 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1449 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1450 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1451 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1452 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1453 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1454 uint8_t * const halfH= ((uint8_t*)half) + 64;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1455 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1456 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1457 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1458 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1459 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1460 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1461 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1462 uint8_t * const halfH= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1463 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1464 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1465 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1466 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1467 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1468 uint64_t half[8 + 9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1469 uint8_t * const halfH= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1470 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1471 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1472 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1473 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1474 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1475 uint64_t half[9];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1476 uint8_t * const halfH= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1477 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1478 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1479 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1480 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1481 OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1482 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1483 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1484 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1485 uint64_t temp[32];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1486 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1487 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1488 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1489 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1490 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1491 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1492 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1493 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1494 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1495 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1496 uint64_t temp[32];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1497 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1498 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1499 OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1500 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1501 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1502 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1503 uint64_t temp[32];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1504 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1505 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1506 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1507 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1508 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1509 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1510 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1511 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1512 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1513 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1514 uint64_t temp[32];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1515 uint8_t * const half= (uint8_t*)temp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1516 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1517 OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1518 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1519 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1520 uint64_t half[16*2 + 17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1521 uint8_t * const halfH= ((uint8_t*)half) + 256;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1522 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1523 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1524 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1525 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1526 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1527 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1528 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1529 uint64_t half[16*2 + 17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1530 uint8_t * const halfH= ((uint8_t*)half) + 256;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1531 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1532 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1533 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1534 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1535 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1536 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1537 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1538 uint64_t half[16*2 + 17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1539 uint8_t * const halfH= ((uint8_t*)half) + 256;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1540 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1541 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1542 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1543 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1544 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1545 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1546 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1547 uint64_t half[16*2 + 17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1548 uint8_t * const halfH= ((uint8_t*)half) + 256;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1549 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1550 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1551 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1552 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1553 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1554 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1555 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1556 uint64_t half[16*2 + 17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1557 uint8_t * const halfH= ((uint8_t*)half) + 256;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1558 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1559 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1560 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1561 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1562 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1563 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1564 uint64_t half[16*2 + 17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1565 uint8_t * const halfH= ((uint8_t*)half) + 256;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1566 uint8_t * const halfHV= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1567 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1568 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1569 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1570 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1571 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1572 uint64_t half[17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1573 uint8_t * const halfH= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1574 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1575 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1576 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1577 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1578 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1579 uint64_t half[17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1580 uint8_t * const halfH= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1581 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1582 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1583 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1584 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1585 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1586 uint64_t half[17*2];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1587 uint8_t * const halfH= ((uint8_t*)half);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1588 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1589 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1590 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1591
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1592 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1593 #define AVG_3DNOW_OP(a,b,temp, size) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1594 "mov" #size " " #b ", " #temp " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1595 "pavgusb " #temp ", " #a " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1596 "mov" #size " " #a ", " #b " \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1597 #define AVG_MMX2_OP(a,b,temp, size) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1598 "mov" #size " " #b ", " #temp " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1599 "pavgb " #temp ", " #a " \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1600 "mov" #size " " #a ", " #b " \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1601
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1602 QPEL_BASE(put_ , ff_pw_16, _ , PUT_OP, PUT_OP)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1603 QPEL_BASE(avg_ , ff_pw_16, _ , AVG_MMX2_OP, AVG_3DNOW_OP)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1604 QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1605 QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, 3dnow)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1606 QPEL_OP(avg_ , ff_pw_16, _ , AVG_3DNOW_OP, 3dnow)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1607 QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1608 QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1609 QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1610 QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1611
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1612 /***********************************/
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1613 /* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1614
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1615 #define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1616 static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1617 OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1618 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1619 #define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1620 static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1621 OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src+S0, stride, SIZE, S1, S2);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1622 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1623
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1624 #define QPEL_2TAP(OPNAME, SIZE, MMX)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1625 QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1626 QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1627 QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1628 static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX =\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1629 OPNAME ## qpel ## SIZE ## _mc00_ ## MMX;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1630 static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX =\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1631 OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1632 static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX =\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1633 OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1634 static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1635 OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src+1, stride, SIZE);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1636 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1637 static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1638 OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src+stride, stride, SIZE);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1639 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1640 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0, 1, 0)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1641 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1, -1, 0)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1642 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0, stride, 0)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1643 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride, -stride, 0)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1644 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0, stride, 1)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1645 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1646 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1647 QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride+1, -stride, -1)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1648
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1649 QPEL_2TAP(put_, 16, mmx2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1650 QPEL_2TAP(avg_, 16, mmx2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1651 QPEL_2TAP(put_, 8, mmx2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1652 QPEL_2TAP(avg_, 8, mmx2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1653 QPEL_2TAP(put_, 16, 3dnow)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1654 QPEL_2TAP(avg_, 16, 3dnow)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1655 QPEL_2TAP(put_, 8, 3dnow)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1656 QPEL_2TAP(avg_, 8, 3dnow)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1657
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1658
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1659 #if 0
8527
f8bf438c6000 Add missing 'void' keyword to parameterless function declarations.
diego
parents: 8519
diff changeset
1660 static void just_return(void) { return; }
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1661 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1662
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1663 static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1664 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1665 const int w = 8;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1666 const int ix = ox>>(16+shift);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1667 const int iy = oy>>(16+shift);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1668 const int oxs = ox>>4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1669 const int oys = oy>>4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1670 const int dxxs = dxx>>4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1671 const int dxys = dxy>>4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1672 const int dyxs = dyx>>4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1673 const int dyys = dyy>>4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1674 const uint16_t r4[4] = {r,r,r,r};
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1675 const uint16_t dxy4[4] = {dxys,dxys,dxys,dxys};
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1676 const uint16_t dyy4[4] = {dyys,dyys,dyys,dyys};
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1677 const uint64_t shift2 = 2*shift;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1678 uint8_t edge_buf[(h+1)*stride];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1679 int x, y;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1680
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1681 const int dxw = (dxx-(1<<(16+shift)))*(w-1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1682 const int dyh = (dyy-(1<<(16+shift)))*(h-1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1683 const int dxh = dxy*(h-1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1684 const int dyw = dyx*(w-1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1685 if( // non-constant fullpel offset (3% of blocks)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1686 ((ox^(ox+dxw)) | (ox^(ox+dxh)) | (ox^(ox+dxw+dxh)) |
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1687 (oy^(oy+dyw)) | (oy^(oy+dyh)) | (oy^(oy+dyw+dyh))) >> (16+shift)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1688 // uses more than 16 bits of subpel mv (only at huge resolution)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1689 || (dxx|dxy|dyx|dyy)&15 )
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1690 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1691 //FIXME could still use mmx for some of the rows
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1692 ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, width, height);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1693 return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1694 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1695
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1696 src += ix + iy*stride;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1697 if( (unsigned)ix >= width-w ||
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1698 (unsigned)iy >= height-h )
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1699 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1700 ff_emulated_edge_mc(edge_buf, src, stride, w+1, h+1, ix, iy, width, height);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1701 src = edge_buf;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1702 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1703
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1704 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1705 "movd %0, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1706 "pxor %%mm7, %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1707 "punpcklwd %%mm6, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1708 "punpcklwd %%mm6, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1709 :: "r"(1<<shift)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1710 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1711
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1712 for(x=0; x<w; x+=4){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1713 uint16_t dx4[4] = { oxs - dxys + dxxs*(x+0),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1714 oxs - dxys + dxxs*(x+1),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1715 oxs - dxys + dxxs*(x+2),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1716 oxs - dxys + dxxs*(x+3) };
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1717 uint16_t dy4[4] = { oys - dyys + dyxs*(x+0),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1718 oys - dyys + dyxs*(x+1),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1719 oys - dyys + dyxs*(x+2),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1720 oys - dyys + dyxs*(x+3) };
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1721
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1722 for(y=0; y<h; y++){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1723 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1724 "movq %0, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1725 "movq %1, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1726 "paddw %2, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1727 "paddw %3, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1728 "movq %%mm4, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1729 "movq %%mm5, %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1730 "psrlw $12, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1731 "psrlw $12, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1732 : "+m"(*dx4), "+m"(*dy4)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1733 : "m"(*dxy4), "m"(*dyy4)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1734 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1735
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1736 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1737 "movq %%mm6, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1738 "movq %%mm6, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1739 "psubw %%mm4, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1740 "psubw %%mm5, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1741 "movq %%mm2, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1742 "movq %%mm4, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1743 "pmullw %%mm1, %%mm0 \n\t" // (s-dx)*(s-dy)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1744 "pmullw %%mm5, %%mm3 \n\t" // dx*dy
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1745 "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1746 "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1747
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1748 "movd %4, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1749 "movd %3, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1750 "punpcklbw %%mm7, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1751 "punpcklbw %%mm7, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1752 "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1753 "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1754
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1755 "movd %2, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1756 "movd %1, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1757 "punpcklbw %%mm7, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1758 "punpcklbw %%mm7, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1759 "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1760 "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1761 "paddw %5, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1762 "paddw %%mm3, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1763 "paddw %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1764 "paddw %%mm2, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1765
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1766 "psrlw %6, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1767 "packuswb %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1768 "movd %%mm0, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1769
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1770 : "=m"(dst[x+y*stride])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1771 : "m"(src[0]), "m"(src[1]),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1772 "m"(src[stride]), "m"(src[stride+1]),
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1773 "m"(*r4), "m"(shift2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1774 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1775 src += stride;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1776 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1777 src += 4-h*stride;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1778 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1779 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1780
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1781 #define PREFETCH(name, op) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1782 static void name(void *mem, int stride, int h){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1783 const uint8_t *p= mem;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1784 do{\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1785 __asm__ volatile(#op" %0" :: "m"(*p));\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1786 p+= stride;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1787 }while(--h);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1788 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1789 PREFETCH(prefetch_mmx2, prefetcht0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1790 PREFETCH(prefetch_3dnow, prefetch)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1791 #undef PREFETCH
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1792
12450
3941687b4fa9 Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents: 12439
diff changeset
1793 #include "h264_qpel_mmx.c"
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1794
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1795 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1796 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1797 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1798 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1799 void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1800 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1801 void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1802 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1803 void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1804 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1805 void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1806 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1807 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1808 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1809 void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1810 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1811 void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1812 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1813
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1814 void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1815 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1816 void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1817 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1818 void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1819 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1820 void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1821 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1822 void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1823 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1824 void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1825 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1826
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1827 void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1828 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1829 void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1830 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1831
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1832 void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1833 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1834 void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1835 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1836 void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1837 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1838
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1839 void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1840 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1841 void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1842 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1843 void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1844 int stride, int h, int x, int y);
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
1845
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1846
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1847 /* CAVS specific */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1848 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1849 put_pixels8_mmx(dst, src, stride, 8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1850 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1851 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1852 avg_pixels8_mmx(dst, src, stride, 8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1853 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1854 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1855 put_pixels16_mmx(dst, src, stride, 16);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1856 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1857 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1858 avg_pixels16_mmx(dst, src, stride, 16);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1859 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1860
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1861 /* VC1 specific */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1862 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1863 put_pixels8_mmx(dst, src, stride, 8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1864 }
9441
e14cd3ac3806 VC1: extend MMX qpel MC to include MMX2 avg qpel
conrad
parents: 9440
diff changeset
1865 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
e14cd3ac3806 VC1: extend MMX qpel MC to include MMX2 avg qpel
conrad
parents: 9440
diff changeset
1866 avg_pixels8_mmx2(dst, src, stride, 8);
e14cd3ac3806 VC1: extend MMX qpel MC to include MMX2 avg qpel
conrad
parents: 9440
diff changeset
1867 }
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1868
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1869 /* XXX: those functions should be suppressed ASAP when all IDCTs are
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1870 converted */
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8527
diff changeset
1871 #if CONFIG_GPL
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1872 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1873 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1874 ff_mmx_idct (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1875 ff_put_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1876 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1877 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1878 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1879 ff_mmx_idct (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1880 ff_add_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1881 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1882 static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1883 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1884 ff_mmxext_idct (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1885 ff_put_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1886 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1887 static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1888 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1889 ff_mmxext_idct (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1890 ff_add_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1891 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1892 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1893 static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1894 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1895 ff_idct_xvid_mmx (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1896 ff_put_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1897 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1898 static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1899 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1900 ff_idct_xvid_mmx (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1901 ff_add_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1902 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1903 static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1904 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1905 ff_idct_xvid_mmx2 (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1906 ff_put_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1907 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1908 static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1909 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1910 ff_idct_xvid_mmx2 (block);
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
1911 ff_add_pixels_clamped_mmx(block, dest, line_size);
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1912 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1913
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1914 static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1915 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1916 int i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1917 __asm__ volatile("pxor %%mm7, %%mm7":);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1918 for(i=0; i<blocksize; i+=2) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1919 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1920 "movq %0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1921 "movq %1, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1922 "movq %%mm0, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1923 "movq %%mm1, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1924 "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1925 "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1926 "pslld $31, %%mm2 \n\t" // keep only the sign bit
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1927 "pxor %%mm2, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1928 "movq %%mm3, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1929 "pand %%mm1, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1930 "pandn %%mm1, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1931 "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1932 "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1933 "movq %%mm3, %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1934 "movq %%mm0, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1935 :"+m"(mag[i]), "+m"(ang[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1936 ::"memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1937 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1938 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1939 __asm__ volatile("femms");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1940 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1941 static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1942 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1943 int i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1944
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1945 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1946 "movaps %0, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1947 ::"m"(ff_pdw_80000000[0])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1948 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1949 for(i=0; i<blocksize; i+=4) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1950 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1951 "movaps %0, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1952 "movaps %1, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1953 "xorps %%xmm2, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1954 "xorps %%xmm3, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1955 "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1956 "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1957 "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1958 "xorps %%xmm2, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1959 "movaps %%xmm3, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1960 "andps %%xmm1, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1961 "andnps %%xmm1, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1962 "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1963 "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1964 "movaps %%xmm3, %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1965 "movaps %%xmm0, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1966 :"+m"(mag[i]), "+m"(ang[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1967 ::"memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1968 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1969 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1970 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1971
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1972 #define IF1(x) x
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1973 #define IF0(x)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1974
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1975 #define MIX5(mono,stereo)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1976 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1977 "movss 0(%2), %%xmm5 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1978 "movss 8(%2), %%xmm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1979 "movss 24(%2), %%xmm7 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1980 "shufps $0, %%xmm5, %%xmm5 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1981 "shufps $0, %%xmm6, %%xmm6 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1982 "shufps $0, %%xmm7, %%xmm7 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1983 "1: \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1984 "movaps (%0,%1), %%xmm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1985 "movaps 0x400(%0,%1), %%xmm1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1986 "movaps 0x800(%0,%1), %%xmm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1987 "movaps 0xc00(%0,%1), %%xmm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1988 "movaps 0x1000(%0,%1), %%xmm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1989 "mulps %%xmm5, %%xmm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1990 "mulps %%xmm6, %%xmm1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1991 "mulps %%xmm5, %%xmm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1992 "mulps %%xmm7, %%xmm3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1993 "mulps %%xmm7, %%xmm4 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1994 stereo("addps %%xmm1, %%xmm0 \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1995 "addps %%xmm1, %%xmm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1996 "addps %%xmm3, %%xmm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1997 "addps %%xmm4, %%xmm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1998 mono("addps %%xmm2, %%xmm0 \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1999 "movaps %%xmm0, (%0,%1) \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2000 stereo("movaps %%xmm2, 0x400(%0,%1) \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2001 "add $16, %0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2002 "jl 1b \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2003 :"+&r"(i)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2004 :"r"(samples[0]+len), "r"(matrix)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2005 :"memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2006 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2007
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2008 #define MIX_MISC(stereo)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2009 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2010 "1: \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2011 "movaps (%3,%0), %%xmm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2012 stereo("movaps %%xmm0, %%xmm1 \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2013 "mulps %%xmm6, %%xmm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2014 stereo("mulps %%xmm7, %%xmm1 \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2015 "lea 1024(%3,%0), %1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2016 "mov %5, %2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2017 "2: \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2018 "movaps (%1), %%xmm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2019 stereo("movaps %%xmm2, %%xmm3 \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2020 "mulps (%4,%2), %%xmm2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2021 stereo("mulps 16(%4,%2), %%xmm3 \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2022 "addps %%xmm2, %%xmm0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2023 stereo("addps %%xmm3, %%xmm1 \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2024 "add $1024, %1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2025 "add $32, %2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2026 "jl 2b \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2027 "movaps %%xmm0, (%3,%0) \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2028 stereo("movaps %%xmm1, 1024(%3,%0) \n")\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2029 "add $16, %0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2030 "jl 1b \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2031 :"+&r"(i), "=&r"(j), "=&r"(k)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2032 :"r"(samples[0]+len), "r"(matrix_simd+in_ch), "g"((intptr_t)-32*(in_ch-1))\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2033 :"memory"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2034 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2035
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2036 static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2037 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2038 int (*matrix_cmp)[2] = (int(*)[2])matrix;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2039 intptr_t i,j,k;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2040
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2041 i = -len*sizeof(float);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2042 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_cmp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^matrix_cmp[2][1]))) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2043 MIX5(IF0,IF1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2044 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2045 MIX5(IF1,IF0);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2046 } else {
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
2047 DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4];
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2048 j = 2*in_ch*sizeof(float);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2049 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2050 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2051 "sub $8, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2052 "movss (%2,%0), %%xmm6 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2053 "movss 4(%2,%0), %%xmm7 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2054 "shufps $0, %%xmm6, %%xmm6 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2055 "shufps $0, %%xmm7, %%xmm7 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2056 "movaps %%xmm6, (%1,%0,4) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2057 "movaps %%xmm7, 16(%1,%0,4) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2058 "jg 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2059 :"+&r"(j)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2060 :"r"(matrix_simd), "r"(matrix)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2061 :"memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2062 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2063 if(out_ch == 2) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2064 MIX_MISC(IF1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2065 } else {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2066 MIX_MISC(IF0);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2067 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2068 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2069 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2070
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2071 static void vector_fmul_3dnow(float *dst, const float *src, int len){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2072 x86_reg i = (len-4)*4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2073 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2074 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2075 "movq (%1,%0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2076 "movq 8(%1,%0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2077 "pfmul (%2,%0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2078 "pfmul 8(%2,%0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2079 "movq %%mm0, (%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2080 "movq %%mm1, 8(%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2081 "sub $16, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2082 "jge 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2083 "femms \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2084 :"+r"(i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2085 :"r"(dst), "r"(src)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2086 :"memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2087 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2088 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2089 static void vector_fmul_sse(float *dst, const float *src, int len){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2090 x86_reg i = (len-8)*4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2091 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2092 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2093 "movaps (%1,%0), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2094 "movaps 16(%1,%0), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2095 "mulps (%2,%0), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2096 "mulps 16(%2,%0), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2097 "movaps %%xmm0, (%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2098 "movaps %%xmm1, 16(%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2099 "sub $32, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2100 "jge 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2101 :"+r"(i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2102 :"r"(dst), "r"(src)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2103 :"memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2104 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2105 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2106
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2107 static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2108 x86_reg i = len*4-16;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2109 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2110 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2111 "pswapd 8(%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2112 "pswapd (%1), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2113 "pfmul (%3,%0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2114 "pfmul 8(%3,%0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2115 "movq %%mm0, (%2,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2116 "movq %%mm1, 8(%2,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2117 "add $16, %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2118 "sub $16, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2119 "jge 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2120 :"+r"(i), "+r"(src1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2121 :"r"(dst), "r"(src0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2122 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2123 __asm__ volatile("femms");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2124 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2125 static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2126 x86_reg i = len*4-32;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2127 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2128 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2129 "movaps 16(%1), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2130 "movaps (%1), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2131 "shufps $0x1b, %%xmm0, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2132 "shufps $0x1b, %%xmm1, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2133 "mulps (%3,%0), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2134 "mulps 16(%3,%0), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2135 "movaps %%xmm0, (%2,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2136 "movaps %%xmm1, 16(%2,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2137 "add $32, %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2138 "sub $32, %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2139 "jge 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2140 :"+r"(i), "+r"(src1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2141 :"r"(dst), "r"(src0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2142 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2143 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2144
10300
4d1b9ca628fc Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents: 10107
diff changeset
2145 static void vector_fmul_add_3dnow(float *dst, const float *src0, const float *src1,
4d1b9ca628fc Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents: 10107
diff changeset
2146 const float *src2, int len){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2147 x86_reg i = (len-4)*4;
10301
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2148 __asm__ volatile(
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2149 "1: \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2150 "movq (%2,%0), %%mm0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2151 "movq 8(%2,%0), %%mm1 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2152 "pfmul (%3,%0), %%mm0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2153 "pfmul 8(%3,%0), %%mm1 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2154 "pfadd (%4,%0), %%mm0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2155 "pfadd 8(%4,%0), %%mm1 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2156 "movq %%mm0, (%1,%0) \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2157 "movq %%mm1, 8(%1,%0) \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2158 "sub $16, %0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2159 "jge 1b \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2160 :"+r"(i)
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2161 :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2162 :"memory"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2163 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2164 __asm__ volatile("femms");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2165 }
10300
4d1b9ca628fc Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents: 10107
diff changeset
2166 static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
4d1b9ca628fc Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents: 10107
diff changeset
2167 const float *src2, int len){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2168 x86_reg i = (len-8)*4;
10301
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2169 __asm__ volatile(
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2170 "1: \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2171 "movaps (%2,%0), %%xmm0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2172 "movaps 16(%2,%0), %%xmm1 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2173 "mulps (%3,%0), %%xmm0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2174 "mulps 16(%3,%0), %%xmm1 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2175 "addps (%4,%0), %%xmm0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2176 "addps 16(%4,%0), %%xmm1 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2177 "movaps %%xmm0, (%1,%0) \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2178 "movaps %%xmm1, 16(%1,%0) \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2179 "sub $32, %0 \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2180 "jge 1b \n\t"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2181 :"+r"(i)
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2182 :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2183 :"memory"
02798c603744 cosmetics: fix indentation after previous commit
mru
parents: 10300
diff changeset
2184 );
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2185 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2186
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2187 static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2188 const float *win, float add_bias, int len){
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8527
diff changeset
2189 #if HAVE_6REGS
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2190 if(add_bias == 0){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2191 x86_reg i = -len*4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2192 x86_reg j = len*4-8;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2193 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2194 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2195 "pswapd (%5,%1), %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2196 "movq (%5,%0), %%mm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2197 "pswapd (%4,%1), %%mm5 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2198 "movq (%3,%0), %%mm4 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2199 "movq %%mm0, %%mm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2200 "movq %%mm1, %%mm3 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2201 "pfmul %%mm4, %%mm2 \n" // src0[len+i]*win[len+i]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2202 "pfmul %%mm5, %%mm3 \n" // src1[ j]*win[len+j]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2203 "pfmul %%mm4, %%mm1 \n" // src0[len+i]*win[len+j]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2204 "pfmul %%mm5, %%mm0 \n" // src1[ j]*win[len+i]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2205 "pfadd %%mm3, %%mm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2206 "pfsub %%mm0, %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2207 "pswapd %%mm2, %%mm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2208 "movq %%mm1, (%2,%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2209 "movq %%mm2, (%2,%1) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2210 "sub $8, %1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2211 "add $8, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2212 "jl 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2213 "femms \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2214 :"+r"(i), "+r"(j)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2215 :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2216 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2217 }else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2218 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2219 ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2220 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2221
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2222 static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2223 const float *win, float add_bias, int len){
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8527
diff changeset
2224 #if HAVE_6REGS
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2225 if(add_bias == 0){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2226 x86_reg i = -len*4;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2227 x86_reg j = len*4-16;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2228 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2229 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2230 "movaps (%5,%1), %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2231 "movaps (%5,%0), %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2232 "movaps (%4,%1), %%xmm5 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2233 "movaps (%3,%0), %%xmm4 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2234 "shufps $0x1b, %%xmm1, %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2235 "shufps $0x1b, %%xmm5, %%xmm5 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2236 "movaps %%xmm0, %%xmm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2237 "movaps %%xmm1, %%xmm3 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2238 "mulps %%xmm4, %%xmm2 \n" // src0[len+i]*win[len+i]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2239 "mulps %%xmm5, %%xmm3 \n" // src1[ j]*win[len+j]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2240 "mulps %%xmm4, %%xmm1 \n" // src0[len+i]*win[len+j]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2241 "mulps %%xmm5, %%xmm0 \n" // src1[ j]*win[len+i]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2242 "addps %%xmm3, %%xmm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2243 "subps %%xmm0, %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2244 "shufps $0x1b, %%xmm2, %%xmm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2245 "movaps %%xmm1, (%2,%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2246 "movaps %%xmm2, (%2,%1) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2247 "sub $16, %1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2248 "add $16, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2249 "jl 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2250 :"+r"(i), "+r"(j)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2251 :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2252 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2253 }else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2254 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2255 ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2256 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2257
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2258 static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2259 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2260 x86_reg i = -4*len;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2261 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2262 "movss %3, %%xmm4 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2263 "shufps $0, %%xmm4, %%xmm4 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2264 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2265 "cvtpi2ps (%2,%0), %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2266 "cvtpi2ps 8(%2,%0), %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2267 "cvtpi2ps 16(%2,%0), %%xmm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2268 "cvtpi2ps 24(%2,%0), %%xmm3 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2269 "movlhps %%xmm1, %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2270 "movlhps %%xmm3, %%xmm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2271 "mulps %%xmm4, %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2272 "mulps %%xmm4, %%xmm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2273 "movaps %%xmm0, (%1,%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2274 "movaps %%xmm2, 16(%1,%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2275 "add $32, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2276 "jl 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2277 :"+r"(i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2278 :"r"(dst+len), "r"(src+len), "m"(mul)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2279 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2280 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2281
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2282 static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2283 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2284 x86_reg i = -4*len;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2285 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2286 "movss %3, %%xmm4 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2287 "shufps $0, %%xmm4, %%xmm4 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2288 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2289 "cvtdq2ps (%2,%0), %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2290 "cvtdq2ps 16(%2,%0), %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2291 "mulps %%xmm4, %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2292 "mulps %%xmm4, %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2293 "movaps %%xmm0, (%1,%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2294 "movaps %%xmm1, 16(%1,%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2295 "add $32, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2296 "jl 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2297 :"+r"(i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2298 :"r"(dst+len), "r"(src+len), "m"(mul)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2299 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2300 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2301
10105
7775f6627612 Mark parameter src of vector_clipf() as const
vitor
parents: 10104
diff changeset
2302 static void vector_clipf_sse(float *dst, const float *src, float min, float max,
10104
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2303 int len)
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2304 {
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2305 x86_reg i = (len-16)*4;
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2306 __asm__ volatile(
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2307 "movss %3, %%xmm4 \n"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2308 "movss %4, %%xmm5 \n"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2309 "shufps $0, %%xmm4, %%xmm4 \n"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2310 "shufps $0, %%xmm5, %%xmm5 \n"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2311 "1: \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2312 "movaps (%2,%0), %%xmm0 \n\t" // 3/1 on intel
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2313 "movaps 16(%2,%0), %%xmm1 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2314 "movaps 32(%2,%0), %%xmm2 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2315 "movaps 48(%2,%0), %%xmm3 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2316 "maxps %%xmm4, %%xmm0 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2317 "maxps %%xmm4, %%xmm1 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2318 "maxps %%xmm4, %%xmm2 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2319 "maxps %%xmm4, %%xmm3 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2320 "minps %%xmm5, %%xmm0 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2321 "minps %%xmm5, %%xmm1 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2322 "minps %%xmm5, %%xmm2 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2323 "minps %%xmm5, %%xmm3 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2324 "movaps %%xmm0, (%1,%0) \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2325 "movaps %%xmm1, 16(%1,%0) \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2326 "movaps %%xmm2, 32(%1,%0) \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2327 "movaps %%xmm3, 48(%1,%0) \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2328 "sub $64, %0 \n\t"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2329 "jge 1b \n\t"
10107
3b61bc6ce377 Mark "i" parameter of vector_clipf_sse() as early-clobber
vitor
parents: 10105
diff changeset
2330 :"+&r"(i)
10104
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2331 :"r"(dst), "r"(src), "m"(min), "m"(max)
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2332 :"memory"
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2333 );
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2334 }
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2335
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2336 static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2337 x86_reg reglen = len;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2338 // not bit-exact: pf2id uses different rounding than C and SSE
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2339 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2340 "add %0 , %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2341 "lea (%2,%0,2) , %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2342 "add %0 , %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2343 "neg %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2344 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2345 "pf2id (%2,%0,2) , %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2346 "pf2id 8(%2,%0,2) , %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2347 "pf2id 16(%2,%0,2) , %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2348 "pf2id 24(%2,%0,2) , %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2349 "packssdw %%mm1 , %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2350 "packssdw %%mm3 , %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2351 "movq %%mm0 , (%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2352 "movq %%mm2 , 8(%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2353 "add $16 , %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2354 " js 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2355 "femms \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2356 :"+r"(reglen), "+r"(dst), "+r"(src)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2357 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2358 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2359 static void float_to_int16_sse(int16_t *dst, const float *src, long len){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2360 x86_reg reglen = len;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2361 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2362 "add %0 , %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2363 "lea (%2,%0,2) , %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2364 "add %0 , %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2365 "neg %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2366 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2367 "cvtps2pi (%2,%0,2) , %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2368 "cvtps2pi 8(%2,%0,2) , %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2369 "cvtps2pi 16(%2,%0,2) , %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2370 "cvtps2pi 24(%2,%0,2) , %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2371 "packssdw %%mm1 , %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2372 "packssdw %%mm3 , %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2373 "movq %%mm0 , (%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2374 "movq %%mm2 , 8(%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2375 "add $16 , %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2376 " js 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2377 "emms \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2378 :"+r"(reglen), "+r"(dst), "+r"(src)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2379 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2380 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2381
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2382 static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2383 x86_reg reglen = len;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2384 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2385 "add %0 , %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2386 "lea (%2,%0,2) , %2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2387 "add %0 , %1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2388 "neg %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2389 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2390 "cvtps2dq (%2,%0,2) , %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2391 "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2392 "packssdw %%xmm1 , %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2393 "movdqa %%xmm0 , (%1,%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2394 "add $16 , %0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2395 " js 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2396 :"+r"(reglen), "+r"(dst), "+r"(src)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2397 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2398 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2399
12436
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2400 void ff_vp3_idct_mmx(int16_t *input_data);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2401 void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2402 void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2403
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2404 void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2405
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2406 void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2407 void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2408
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2409 void ff_vp3_idct_sse2(int16_t *input_data);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2410 void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2411 void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
d6d0a43848b4 Move VP3 IDCT functions from inline ASM to YASM. This fixes part of the VP3/5/6
rbultje
parents: 12435
diff changeset
2412
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2413 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2414 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2415 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
11981
d3551fcf4c1c Add const to some pointer parameters.
cehoyos
parents: 11951
diff changeset
2416 int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, int order, int shift);
d3551fcf4c1c Add const to some pointer parameters.
cehoyos
parents: 11951
diff changeset
2417 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int order, int shift);
d3551fcf4c1c Add const to some pointer parameters.
cehoyos
parents: 11951
diff changeset
2418 int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul);
d3551fcf4c1c Add const to some pointer parameters.
cehoyos
parents: 11951
diff changeset
2419 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul);
d3551fcf4c1c Add const to some pointer parameters.
cehoyos
parents: 11951
diff changeset
2420 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul);
10431
546b7ebeaf07 huffyuv: add some const qualifiers
lorenm
parents: 10430
diff changeset
2421 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
546b7ebeaf07 huffyuv: add some const qualifiers
lorenm
parents: 10430
diff changeset
2422 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left);
546b7ebeaf07 huffyuv: add some const qualifiers
lorenm
parents: 10430
diff changeset
2423 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left);
10645
6f958f237d7d r20739 broke compilation on systems without yasm
lorenm
parents: 10644
diff changeset
2424
12450
3941687b4fa9 Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
rbultje
parents: 12439
diff changeset
2425 #if !HAVE_YASM
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2426 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2427 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2428 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2429 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2430 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2431
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2432 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2433 /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2434 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 11118
diff changeset
2435 DECLARE_ALIGNED(16, int16_t, tmp)[len];\
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2436 int i,j,c;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2437 for(c=0; c<channels; c++){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2438 float_to_int16_##cpu(tmp, src[c], len);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2439 for(i=0, j=c; i<len; i++, j+=channels)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2440 dst[j] = tmp[i];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2441 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2442 }\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2443 \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2444 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2445 if(channels==1)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2446 float_to_int16_##cpu(dst, src[0], len);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2447 else if(channels==2){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2448 x86_reg reglen = len; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2449 const float *src0 = src[0];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2450 const float *src1 = src[1];\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2451 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2452 "shl $2, %0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2453 "add %0, %1 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2454 "add %0, %2 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2455 "add %0, %3 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2456 "neg %0 \n"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2457 body\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2458 :"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2459 );\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2460 }else if(channels==6){\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2461 ff_float_to_int16_interleave6_##cpu(dst, src, len);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2462 }else\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2463 float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2464 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2465
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2466 FLOAT_TO_INT16_INTERLEAVE(3dnow,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2467 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2468 "pf2id (%2,%0), %%mm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2469 "pf2id 8(%2,%0), %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2470 "pf2id (%3,%0), %%mm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2471 "pf2id 8(%3,%0), %%mm3 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2472 "packssdw %%mm1, %%mm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2473 "packssdw %%mm3, %%mm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2474 "movq %%mm0, %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2475 "punpcklwd %%mm2, %%mm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2476 "punpckhwd %%mm2, %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2477 "movq %%mm0, (%1,%0)\n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2478 "movq %%mm1, 8(%1,%0)\n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2479 "add $16, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2480 "js 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2481 "femms \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2482 )
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2483
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2484 FLOAT_TO_INT16_INTERLEAVE(sse,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2485 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2486 "cvtps2pi (%2,%0), %%mm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2487 "cvtps2pi 8(%2,%0), %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2488 "cvtps2pi (%3,%0), %%mm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2489 "cvtps2pi 8(%3,%0), %%mm3 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2490 "packssdw %%mm1, %%mm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2491 "packssdw %%mm3, %%mm2 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2492 "movq %%mm0, %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2493 "punpcklwd %%mm2, %%mm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2494 "punpckhwd %%mm2, %%mm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2495 "movq %%mm0, (%1,%0)\n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2496 "movq %%mm1, 8(%1,%0)\n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2497 "add $16, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2498 "js 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2499 "emms \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2500 )
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2501
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2502 FLOAT_TO_INT16_INTERLEAVE(sse2,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2503 "1: \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2504 "cvtps2dq (%2,%0), %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2505 "cvtps2dq (%3,%0), %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2506 "packssdw %%xmm1, %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2507 "movhlps %%xmm0, %%xmm1 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2508 "punpcklwd %%xmm1, %%xmm0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2509 "movdqa %%xmm0, (%1,%0) \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2510 "add $16, %0 \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2511 "js 1b \n"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2512 )
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2513
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2514 static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2515 if(channels==6)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2516 ff_float_to_int16_interleave6_3dn2(dst, src, len);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2517 else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2518 float_to_int16_interleave_3dnow(dst, src, len, channels);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2519 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2520
10964
abb3b23bda35 Implement an sse version of scalarproduct_float().
alexc
parents: 10961
diff changeset
2521 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
abb3b23bda35 Implement an sse version of scalarproduct_float().
alexc
parents: 10961
diff changeset
2522
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2523 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2524 {
12414
3fc4c625b6f3 Remove global mm_flags variable
mru
parents: 12366
diff changeset
2525 int mm_flags = mm_support();
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2526
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2527 if (avctx->dsp_mask) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2528 if (avctx->dsp_mask & FF_MM_FORCE)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2529 mm_flags |= (avctx->dsp_mask & 0xffff);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2530 else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2531 mm_flags &= ~(avctx->dsp_mask & 0xffff);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2532 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2533
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2534 #if 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2535 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2536 if (mm_flags & FF_MM_MMX)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2537 av_log(avctx, AV_LOG_INFO, " mmx");
9342
7f594601d5e9 Rename FF_MM_MMXEXT to FF_MM_MMX2, for both clarity and consistency
stefano
parents: 9341
diff changeset
2538 if (mm_flags & FF_MM_MMX2)
7f594601d5e9 Rename FF_MM_MMXEXT to FF_MM_MMX2, for both clarity and consistency
stefano
parents: 9341
diff changeset
2539 av_log(avctx, AV_LOG_INFO, " mmx2");
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2540 if (mm_flags & FF_MM_3DNOW)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2541 av_log(avctx, AV_LOG_INFO, " 3dnow");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2542 if (mm_flags & FF_MM_SSE)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2543 av_log(avctx, AV_LOG_INFO, " sse");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2544 if (mm_flags & FF_MM_SSE2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2545 av_log(avctx, AV_LOG_INFO, " sse2");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2546 av_log(avctx, AV_LOG_INFO, "\n");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2547 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2548
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2549 if (mm_flags & FF_MM_MMX) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2550 const int idct_algo= avctx->idct_algo;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2551
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2552 if(avctx->lowres==0){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2553 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2554 c->idct_put= ff_simple_idct_put_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2555 c->idct_add= ff_simple_idct_add_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2556 c->idct = ff_simple_idct_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2557 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8527
diff changeset
2558 #if CONFIG_GPL
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2559 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
9342
7f594601d5e9 Rename FF_MM_MMXEXT to FF_MM_MMX2, for both clarity and consistency
stefano
parents: 9341
diff changeset
2560 if(mm_flags & FF_MM_MMX2){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2561 c->idct_put= ff_libmpeg2mmx2_idct_put;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2562 c->idct_add= ff_libmpeg2mmx2_idct_add;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2563 c->idct = ff_mmxext_idct;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2564 }else{
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2565 c->idct_put= ff_libmpeg2mmx_idct_put;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2566 c->idct_add= ff_libmpeg2mmx_idct_add;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2567 c->idct = ff_mmx_idct;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2568 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2569 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2570 #endif
9975
d6d7e8d4a04d Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents: 9959
diff changeset
2571 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
12439
51fc247eed32 Fix compilation failure if yasm is disabled (missing vp3 symbols).
rbultje
parents: 12437
diff changeset
2572 idct_algo==FF_IDCT_VP3 && HAVE_YASM){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2573 if(mm_flags & FF_MM_SSE2){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2574 c->idct_put= ff_vp3_idct_put_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2575 c->idct_add= ff_vp3_idct_add_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2576 c->idct = ff_vp3_idct_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2577 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2578 }else{
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2579 c->idct_put= ff_vp3_idct_put_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2580 c->idct_add= ff_vp3_idct_add_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2581 c->idct = ff_vp3_idct_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2582 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2583 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2584 }else if(idct_algo==FF_IDCT_CAVS){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2585 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2586 }else if(idct_algo==FF_IDCT_XVIDMMX){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2587 if(mm_flags & FF_MM_SSE2){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2588 c->idct_put= ff_idct_xvid_sse2_put;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2589 c->idct_add= ff_idct_xvid_sse2_add;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2590 c->idct = ff_idct_xvid_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2591 c->idct_permutation_type= FF_SSE2_IDCT_PERM;
9342
7f594601d5e9 Rename FF_MM_MMXEXT to FF_MM_MMX2, for both clarity and consistency
stefano
parents: 9341
diff changeset
2592 }else if(mm_flags & FF_MM_MMX2){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2593 c->idct_put= ff_idct_xvid_mmx2_put;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2594 c->idct_add= ff_idct_xvid_mmx2_add;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2595 c->idct = ff_idct_xvid_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2596 }else{
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2597 c->idct_put= ff_idct_xvid_mmx_put;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2598 c->idct_add= ff_idct_xvid_mmx_add;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2599 c->idct = ff_idct_xvid_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2600 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2601 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2602 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2603
12435
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
2604 c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
2605 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
fe78a4548d12 Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx()
rbultje
parents: 12417
diff changeset
2606 c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2607 c->clear_block = clear_block_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2608 c->clear_blocks = clear_blocks_mmx;
10766
78c2be62260a Fix XvMC. XvMCCreateBlocks() may not allocate 16-byte aligned blocks,
gb
parents: 10749
diff changeset
2609 if ((mm_flags & FF_MM_SSE) &&
78c2be62260a Fix XvMC. XvMCCreateBlocks() may not allocate 16-byte aligned blocks,
gb
parents: 10749
diff changeset
2610 !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){
78c2be62260a Fix XvMC. XvMCCreateBlocks() may not allocate 16-byte aligned blocks,
gb
parents: 10749
diff changeset
2611 /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
9861
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
2612 c->clear_block = clear_block_sse;
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
2613 c->clear_blocks = clear_blocks_sse;
89270a3bc4a0 SSE version of clear_blocks
darkshikari
parents: 9445
diff changeset
2614 }
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2615
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2616 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2617 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2618 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2619 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2620 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2621
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2622 SET_HPEL_FUNCS(put, 0, 16, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2623 SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2624 SET_HPEL_FUNCS(avg, 0, 16, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2625 SET_HPEL_FUNCS(avg_no_rnd, 0, 16, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2626 SET_HPEL_FUNCS(put, 1, 8, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2627 SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2628 SET_HPEL_FUNCS(avg, 1, 8, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2629 SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2630
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2631 c->gmc= gmc_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2632
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2633 c->add_bytes= add_bytes_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2634 c->add_bytes_l2= add_bytes_l2_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2635
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2636 c->draw_edges = draw_edges_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2637
10749
5cca4b6c459d Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents: 10645
diff changeset
2638 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2639 c->h263_v_loop_filter= h263_v_loop_filter_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2640 c->h263_h_loop_filter= h263_h_loop_filter_mmx;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2641 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2642
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2643 #if HAVE_YASM
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2644 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2645 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2646 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2647
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2648 c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2649 c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2650 #endif
8519
cc64e1343397 Use H264 MMX chroma functions to accelerate RV40 decoding.
cehoyos
parents: 8510
diff changeset
2651
9342
7f594601d5e9 Rename FF_MM_MMXEXT to FF_MM_MMX2, for both clarity and consistency
stefano
parents: 9341
diff changeset
2652 if (mm_flags & FF_MM_MMX2) {
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2653 c->prefetch = prefetch_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2654
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2655 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2656 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2657
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2658 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2659 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2660 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2661
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2662 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2663 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2664
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2665 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2666 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2667 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2668
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2669 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2670 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2671 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2672 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2673 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2674 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2675 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2676
12439
51fc247eed32 Fix compilation failure if yasm is disabled (missing vp3 symbols).
rbultje
parents: 12437
diff changeset
2677 if (CONFIG_VP3_DECODER && HAVE_YASM) {
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2678 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2679 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2680 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2681 }
12439
51fc247eed32 Fix compilation failure if yasm is disabled (missing vp3 symbols).
rbultje
parents: 12437
diff changeset
2682 if (CONFIG_VP3_DECODER && HAVE_YASM) {
11637
f7281af560fe vp3: DC-only IDCT
conrad
parents: 11499
diff changeset
2683 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
f7281af560fe vp3: DC-only IDCT
conrad
parents: 11499
diff changeset
2684 }
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2685
11826
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2686 if (CONFIG_VP3_DECODER
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2687 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2688 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2;
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2689 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2690 }
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2691
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2692 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2693 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2694 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2695 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2696 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2697 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2698 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2699 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2700 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2701 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2702 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2703 c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2704 c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2705 c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2706 c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2707 c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2708 c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2709
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2710 SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2711 SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2712 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2713 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2714 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2715 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2716
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2717 SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2718 SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2719 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2720 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2721 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2722 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2723
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2724 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2725 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2726 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2727 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2728
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2729 #if HAVE_YASM
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2730 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2731 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
8519
cc64e1343397 Use H264 MMX chroma functions to accelerate RV40 decoding.
cehoyos
parents: 8510
diff changeset
2732
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2733 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
9440
daee921fb6bb VC1: add and use avg_no_rnd chroma MC functions
conrad
parents: 9439
diff changeset
2734
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2735 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2736 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2737 c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2738 c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2739
8760
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
2740 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
2741 #endif
8798
a5c8210814d7 Add check whether the compiler/assembler supports 10 or more operands.
diego
parents: 8760
diff changeset
2742 #if HAVE_7REGS && HAVE_TEN_OPERANDS
8760
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
2743 if( mm_flags&FF_MM_3DNOW )
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
2744 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
2745 #endif
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8668
diff changeset
2746
9995
3141f69e3905 Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents: 9975
diff changeset
2747 if (CONFIG_VC1_DECODER)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2748 ff_vc1dsp_init_mmx(c, avctx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2749
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2750 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2751 } else if (mm_flags & FF_MM_3DNOW) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2752 c->prefetch = prefetch_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2753
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2754 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2755 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2756
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2757 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2758 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2759 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2760
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2761 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2762 c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2763
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2764 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2765 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2766 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2767
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2768 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2769 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2770 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2771 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2772 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2773 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2774 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2775 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2776
11826
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2777 if (CONFIG_VP3_DECODER
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2778 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2779 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow;
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2780 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2781 }
11c5a87497d3 Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
conrad
parents: 11637
diff changeset
2782
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2783 SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2784 SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2785 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2786 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2787 SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2788 SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2789
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2790 SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2791 SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2792 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2793 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2794 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2795 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2796
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2797 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2798 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2799 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2800 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2801
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2802 #if HAVE_YASM
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2803 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2804 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2805
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2806 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2807
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2808 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2809 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2810 #endif
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2811 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2812
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2813
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2814 #define H264_QPEL_FUNCS(x, y, CPU)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2815 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2816 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2817 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2818 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2819 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2820 // these functions are slower than mmx on AMD, but faster on Intel
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2821 c->put_pixels_tab[0][0] = put_pixels16_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2822 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2823 H264_QPEL_FUNCS(0, 0, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2824 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2825 if(mm_flags & FF_MM_SSE2){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2826 H264_QPEL_FUNCS(0, 1, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2827 H264_QPEL_FUNCS(0, 2, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2828 H264_QPEL_FUNCS(0, 3, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2829 H264_QPEL_FUNCS(1, 1, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2830 H264_QPEL_FUNCS(1, 2, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2831 H264_QPEL_FUNCS(1, 3, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2832 H264_QPEL_FUNCS(2, 1, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2833 H264_QPEL_FUNCS(2, 2, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2834 H264_QPEL_FUNCS(2, 3, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2835 H264_QPEL_FUNCS(3, 1, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2836 H264_QPEL_FUNCS(3, 2, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2837 H264_QPEL_FUNCS(3, 3, sse2);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2838 }
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8527
diff changeset
2839 #if HAVE_SSSE3
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2840 if(mm_flags & FF_MM_SSSE3){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2841 H264_QPEL_FUNCS(1, 0, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2842 H264_QPEL_FUNCS(1, 1, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2843 H264_QPEL_FUNCS(1, 2, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2844 H264_QPEL_FUNCS(1, 3, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2845 H264_QPEL_FUNCS(2, 0, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2846 H264_QPEL_FUNCS(2, 1, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2847 H264_QPEL_FUNCS(2, 2, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2848 H264_QPEL_FUNCS(2, 3, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2849 H264_QPEL_FUNCS(3, 0, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2850 H264_QPEL_FUNCS(3, 1, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2851 H264_QPEL_FUNCS(3, 2, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2852 H264_QPEL_FUNCS(3, 3, ssse3);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2853 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
10430
12c8175d6db5 simd add_hfyu_left_prediction
lorenm
parents: 10301
diff changeset
2854 #if HAVE_YASM
12437
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2855 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2856 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2857 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2858 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2859 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
b242eb86ea9a Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
rbultje
parents: 12436
diff changeset
2860 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
10430
12c8175d6db5 simd add_hfyu_left_prediction
lorenm
parents: 10301
diff changeset
2861 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
12c8175d6db5 simd add_hfyu_left_prediction
lorenm
parents: 10301
diff changeset
2862 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe
12c8175d6db5 simd add_hfyu_left_prediction
lorenm
parents: 10301
diff changeset
2863 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
12c8175d6db5 simd add_hfyu_left_prediction
lorenm
parents: 10301
diff changeset
2864 #endif
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2865 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2866 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2867
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2868 if(mm_flags & FF_MM_3DNOW){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2869 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2870 c->vector_fmul = vector_fmul_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2871 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2872 c->float_to_int16 = float_to_int16_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2873 c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2874 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2875 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2876 if(mm_flags & FF_MM_3DNOWEXT){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2877 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2878 c->vector_fmul_window = vector_fmul_window_3dnow2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2879 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2880 c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2881 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2882 }
10633
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2883 if(mm_flags & FF_MM_MMX2){
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2884 #if HAVE_YASM
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2885 c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2;
10644
5da7180afadf refactor and optimize scalarproduct
lorenm
parents: 10633
diff changeset
2886 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2;
10633
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2887 #endif
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2888 }
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2889 if(mm_flags & FF_MM_SSE){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2890 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2891 c->ac3_downmix = ac3_downmix_sse;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2892 c->vector_fmul = vector_fmul_sse;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2893 c->vector_fmul_reverse = vector_fmul_reverse_sse;
10300
4d1b9ca628fc Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents: 10107
diff changeset
2894 c->vector_fmul_add = vector_fmul_add_sse;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2895 c->vector_fmul_window = vector_fmul_window_sse;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2896 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
10104
0fa3d21b317e SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents: 9995
diff changeset
2897 c->vector_clipf = vector_clipf_sse;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2898 c->float_to_int16 = float_to_int16_sse;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2899 c->float_to_int16_interleave = float_to_int16_interleave_sse;
10964
abb3b23bda35 Implement an sse version of scalarproduct_float().
alexc
parents: 10961
diff changeset
2900 #if HAVE_YASM
abb3b23bda35 Implement an sse version of scalarproduct_float().
alexc
parents: 10961
diff changeset
2901 c->scalarproduct_float = ff_scalarproduct_float_sse;
abb3b23bda35 Implement an sse version of scalarproduct_float().
alexc
parents: 10961
diff changeset
2902 #endif
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2903 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2904 if(mm_flags & FF_MM_3DNOW)
10300
4d1b9ca628fc Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents: 10107
diff changeset
2905 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2906 if(mm_flags & FF_MM_SSE2){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2907 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2908 c->float_to_int16 = float_to_int16_sse2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2909 c->float_to_int16_interleave = float_to_int16_interleave_sse2;
10633
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2910 #if HAVE_YASM
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2911 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
10644
5da7180afadf refactor and optimize scalarproduct
lorenm
parents: 10633
diff changeset
2912 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
10633
66242b8fbd32 port ape dsp functions from sse2 to mmx
lorenm
parents: 10431
diff changeset
2913 #endif
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2914 }
10644
5da7180afadf refactor and optimize scalarproduct
lorenm
parents: 10633
diff changeset
2915 if((mm_flags & FF_MM_SSSE3) && !(mm_flags & (FF_MM_SSE42|FF_MM_3DNOW)) && HAVE_YASM) // cachesplit
5da7180afadf refactor and optimize scalarproduct
lorenm
parents: 10633
diff changeset
2916 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2917 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2918
8596
68e959302527 replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents: 8590
diff changeset
2919 if (CONFIG_ENCODERS)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2920 dsputilenc_init_mmx(c, avctx);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2921
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2922 #if 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2923 // for speed testing
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2924 get_pixels = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2925 put_pixels_clamped = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2926 add_pixels_clamped = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2927
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2928 pix_abs16x16 = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2929 pix_abs16x16_x2 = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2930 pix_abs16x16_y2 = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2931 pix_abs16x16_xy2 = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2932
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2933 put_pixels_tab[0] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2934 put_pixels_tab[1] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2935 put_pixels_tab[2] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2936 put_pixels_tab[3] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2937
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2938 put_no_rnd_pixels_tab[0] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2939 put_no_rnd_pixels_tab[1] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2940 put_no_rnd_pixels_tab[2] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2941 put_no_rnd_pixels_tab[3] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2942
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2943 avg_pixels_tab[0] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2944 avg_pixels_tab[1] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2945 avg_pixels_tab[2] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2946 avg_pixels_tab[3] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2947
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2948 avg_no_rnd_pixels_tab[0] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2949 avg_no_rnd_pixels_tab[1] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2950 avg_no_rnd_pixels_tab[2] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2951 avg_no_rnd_pixels_tab[3] = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2952
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2953 //av_fdct = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2954 //ff_idct = just_return;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2955 #endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2956 }