annotate i386/cavsdsp_mmx.c @ 3680:7690bafea6e0 libavcodec

Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg. Clean up the outdated URLs in the header.
author diego
date Tue, 05 Sep 2006 13:50:36 +0000
parents 419409926166
children c8c591fe26f8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
1 /*
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
2 * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
3 * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
4 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
5 * MMX optimised DSP functions, based on H.264 optimisations by
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
6 * Michael Niedermayer and Loren Merritt
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
7 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
8 * This library is free software; you can redistribute it and/or
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
11 * version 2 of the License, or (at your option) any later version.
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
12 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
13 * This library is distributed in the hope that it will be useful,
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
16 * Lesser General Public License for more details.
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
17 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
19 * License along with this library; if not, write to the Free Software
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
21 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
22
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
23 #include "../dsputil.h"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
24 #include "common.h"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
25
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
26 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
27 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
28 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
29 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
30 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
31 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
32
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
33 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
34 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
35 * inverse transform
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
36 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
37 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
38
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
39 #define SUMSUB_BA( a, b ) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
40 "paddw "#b", "#a" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
41 "paddw "#b", "#b" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
42 "psubw "#a", "#b" \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
43
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
44 #define SBUTTERFLY(a,b,t,n)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
45 "movq " #a ", " #t " \n\t" /* abcd */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
46 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
47 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
48
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
49 #define TRANSPOSE4(a,b,c,d,t)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
50 SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
51 SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
52 SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
53 SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
54
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
55 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
56 {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
57 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
58 "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
59 "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
60 "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
61 "movq 48(%0), %%mm7 \n\t" /* mm7 = src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
62 "movq %%mm4, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
63 "movq %%mm5, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
64 "movq %%mm2, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
65 "movq %%mm7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
66
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
67 "paddw %%mm4, %%mm4 \n\t" /* mm4 = 2*src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
68 "paddw %%mm3, %%mm3 \n\t" /* mm3 = 2*src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
69 "paddw %%mm6, %%mm6 \n\t" /* mm6 = 2*src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
70 "paddw %%mm1, %%mm1 \n\t" /* mm1 = 2*src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
71 "paddw %%mm4, %%mm0 \n\t" /* mm0 = 3*src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
72 "paddw %%mm3, %%mm5 \n\t" /* mm5 = 3*src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
73 "paddw %%mm6, %%mm2 \n\t" /* mm2 = 3*src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
74 "paddw %%mm1, %%mm7 \n\t" /* mm7 = 3*src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
75 "psubw %%mm4, %%mm5 \n\t" /* mm5 = 3*src1 - 2*src7 = a0 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
76 "paddw %%mm6, %%mm7 \n\t" /* mm7 = 3*src3 + 2*src5 = a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
77 "psubw %%mm2, %%mm1 \n\t" /* mm1 = 2*src3 - 3*src5 = a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
78 "paddw %%mm0, %%mm3 \n\t" /* mm3 = 2*src1 + 3*src7 = a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
79
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
80 "movq %%mm5, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
81 "movq %%mm7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
82 "movq %%mm3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
83 "movq %%mm1, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
84 SUMSUB_BA( %%mm7, %%mm5 ) /* mm7 = a0 + a1 mm5 = a0 - a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
85 "paddw %%mm3, %%mm7 \n\t" /* mm7 = a0 + a1 + a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
86 "paddw %%mm1, %%mm5 \n\t" /* mm5 = a0 - a1 + a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
87 "paddw %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
88 "paddw %%mm5, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
89 "paddw %%mm6, %%mm7 \n\t" /* mm7 = b4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
90 "paddw %%mm4, %%mm5 \n\t" /* mm5 = b5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
91
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
92 SUMSUB_BA( %%mm1, %%mm3 ) /* mm1 = a3 + a2 mm3 = a3 - a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
93 "psubw %%mm1, %%mm4 \n\t" /* mm4 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
94 "movq %%mm4, %%mm1 \n\t" /* mm1 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
95 "psubw %%mm6, %%mm3 \n\t" /* mm3 = a3 - a2 - a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
96 "paddw %%mm1, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
97 "paddw %%mm3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
98 "psubw %%mm2, %%mm1 \n\t" /* mm1 = b7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
99 "paddw %%mm0, %%mm3 \n\t" /* mm3 = b6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
100
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
101 "movq 32(%0), %%mm2 \n\t" /* mm2 = src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
102 "movq 96(%0), %%mm6 \n\t" /* mm6 = src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
103 "movq %%mm2, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
104 "movq %%mm6, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
105 "psllw $2, %%mm4 \n\t" /* mm4 = 4*src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
106 "psllw $2, %%mm6 \n\t" /* mm6 = 4*src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
107 "paddw %%mm4, %%mm2 \n\t" /* mm2 = 5*src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
108 "paddw %%mm6, %%mm0 \n\t" /* mm0 = 5*src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
109 "paddw %%mm2, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
110 "paddw %%mm0, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
111 "psubw %%mm0, %%mm4 \n\t" /* mm4 = 4*src2 - 10*src6 = a7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
112 "paddw %%mm2, %%mm6 \n\t" /* mm6 = 4*src6 + 10*src2 = a6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
113
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
114 "movq (%0), %%mm2 \n\t" /* mm2 = src0 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
115 "movq 64(%0), %%mm0 \n\t" /* mm0 = src4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
116 SUMSUB_BA( %%mm0, %%mm2 ) /* mm0 = src0+src4 mm2 = src0-src4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
117 "psllw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
118 "psllw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
119 "paddw %1, %%mm0 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
120 "paddw %1, %%mm2 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
121
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
122 SUMSUB_BA( %%mm6, %%mm0 ) /* mm6 = a4 + a6 mm0 = a4 - a6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
123 SUMSUB_BA( %%mm4, %%mm2 ) /* mm4 = a5 + a7 mm2 = a5 - a7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
124 SUMSUB_BA( %%mm7, %%mm6 ) /* mm7 = dst0 mm6 = dst7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
125 SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
126 SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
127 SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
128 :: "r"(block), "m"(bias)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
129 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
130 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
131
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
132 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
133 {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
134 int i;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
135 DECLARE_ALIGNED_8(int16_t, b2[64]);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
136
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
137 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
138 DECLARE_ALIGNED_8(uint64_t, tmp);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
139
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
140 cavs_idct8_1d(block+4*i, ff_pw_4);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
141
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
142 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
143 "psraw $3, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
144 "psraw $3, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
145 "psraw $3, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
146 "psraw $3, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
147 "psraw $3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
148 "psraw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
149 "psraw $3, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
150 "psraw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
151 "movq %%mm7, %0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
152 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
153 "movq %%mm0, 8(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
154 "movq %%mm6, 24(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
155 "movq %%mm7, 40(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
156 "movq %%mm4, 56(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
157 "movq %0, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
158 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
159 "movq %%mm7, (%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
160 "movq %%mm1, 16(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
161 "movq %%mm0, 32(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
162 "movq %%mm3, 48(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
163 : "=m"(tmp)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
164 : "r"(b2+32*i)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
165 : "memory"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
166 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
167 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
168
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
169 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
170 cavs_idct8_1d(b2+4*i, ff_pw_64);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
171
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
172 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
173 "psraw $7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
174 "psraw $7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
175 "psraw $7, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
176 "psraw $7, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
177 "psraw $7, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
178 "psraw $7, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
179 "psraw $7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
180 "psraw $7, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
181 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
182 "movq %%mm5, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
183 "movq %%mm3, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
184 "movq %%mm1, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
185 "movq %%mm0, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
186 "movq %%mm2, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
187 "movq %%mm4, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
188 "movq %%mm6, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
189 :: "r"(b2+4*i)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
190 : "memory"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
191 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
192 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
193
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
194 add_pixels_clamped_mmx(b2, dst, stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
195
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
196 /* clear block */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
197 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
198 "pxor %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
199 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
200 "movq %%mm7, 8(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
201 "movq %%mm7, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
202 "movq %%mm7, 24(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
203 "movq %%mm7, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
204 "movq %%mm7, 40(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
205 "movq %%mm7, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
206 "movq %%mm7, 56(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
207 "movq %%mm7, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
208 "movq %%mm7, 72(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
209 "movq %%mm7, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
210 "movq %%mm7, 88(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
211 "movq %%mm7, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
212 "movq %%mm7, 104(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
213 "movq %%mm7, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
214 "movq %%mm7, 120(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
215 :: "r" (block)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
216 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
217 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
218
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
219 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
220 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
221 * motion compensation
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
222 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
223 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
224
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
225 /* vertical filter [-1 -2 96 42 -7 0] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
226 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
227 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
228 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
229 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
230 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
231 "pmullw %6, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
232 "psllw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
233 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
234 "psraw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
235 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
236 "paddw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
237 "paddw "#B", "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
238 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
239 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
240 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
241 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
242 "psraw $1, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
243 "psubw "#A", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
244 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
245 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
246 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
247 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
248 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
249
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
250 /* vertical filter [ 0 -1 5 5 -1 0] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
251 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
252 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
253 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
254 "paddw "#D", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
255 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
256 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
257 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
258 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
259 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
260 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
261 "psraw $3, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
262 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
263 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
264 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
265
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
266 /* vertical filter [ 0 -7 42 96 -2 -1] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
267 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
268 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
269 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
270 "pmullw %6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
271 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
272 "pmullw %5, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
273 "psllw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
274 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
275 "psraw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
276 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
277 "paddw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
278 "paddw "#E", "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
279 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
280 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
281 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
282 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
283 "psraw $1, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
284 "psubw "#F", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
285 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
286 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
287 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
288 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
289 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
290
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
291
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
292 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
293 int w= 2;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
294 src -= 2*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
295 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
296 while(w--){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
297 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
298 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
299 "movd (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
300 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
301 "movd (%0), %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
302 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
303 "movd (%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
304 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
305 "movd (%0), %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
306 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
307 "movd (%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
308 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
309 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
310 "punpcklbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
311 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
312 "punpcklbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
313 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
314 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
315 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
316 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
317 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
318 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
319 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
320 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
321 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
322 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
323 : "+a"(src), "+c"(dst)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
324 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
325 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
326 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
327 if(h==16){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
328 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
329 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
330 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
331 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
332 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
333 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
334 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
335 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
336 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
337 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
338 : "+a"(src), "+c"(dst)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
339 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
340 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
341 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
342 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
343 src += 4-(h+5)*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
344 dst += 4-h*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
345 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
346
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
347 #define QPEL_CAVS(OPNAME, OP, MMX)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
348 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
349 int h=8;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
350 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
351 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
352 "movq %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
353 "1: \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
354 "movq (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
355 "movq 1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
356 "movq %%mm0, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
357 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
358 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
359 "punpckhbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
360 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
361 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
362 "paddw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
363 "paddw %%mm3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
364 "pmullw %%mm6, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
365 "pmullw %%mm6, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
366 "movq -1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
367 "movq 2(%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
368 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
369 "movq %%mm4, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
370 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
371 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
372 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
373 "punpckhbw %%mm7, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
374 "paddw %%mm4, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
375 "paddw %%mm3, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
376 "psubw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
377 "psubw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
378 "movq %6, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
379 "paddw %%mm5, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
380 "paddw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
381 "psraw $3, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
382 "psraw $3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
383 "packuswb %%mm1, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
384 OP(%%mm0, (%1),%%mm5, q) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
385 "add %3, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
386 "add %4, %1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
387 "decl %2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
388 " jnz 1b \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
389 : "+a"(src), "+c"(dst), "+m"(h)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
390 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
391 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
392 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
393 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
394 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
395 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
396 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
397 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
398 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
399 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
400 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
401 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
402 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
403 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
404 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
405 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
406 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
407 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
408 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
409 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
410 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
411 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
412 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
413 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
414 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
415 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
416 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
417 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
418 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
419 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
420 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
421 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
422 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
423 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
424 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
425 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
426 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
427 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
428 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
429 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
430 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
431 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
432 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
433 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
434 src += 8*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
435 dst += 8*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
436 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
437 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
438 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
439
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
440 #define CAVS_MC(OPNAME, SIZE, MMX) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
441 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
442 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
443 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
444 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
445 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
446 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
447 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
448 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
449 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
450 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
451 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
452 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
453 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
454 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
455 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
456
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
457 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
458 #define AVG_3DNOW_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
459 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
460 "pavgusb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
461 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
462 #define AVG_MMX2_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
463 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
464 "pavgb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
465 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
466
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
467 QPEL_CAVS(put_, PUT_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
468 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
469 QPEL_CAVS(put_, PUT_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
470 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
471
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
472 CAVS_MC(put_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
473 CAVS_MC(put_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
474 CAVS_MC(avg_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
475 CAVS_MC(avg_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
476 CAVS_MC(put_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
477 CAVS_MC(put_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
478 CAVS_MC(avg_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
479 CAVS_MC(avg_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
480
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
481 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
482 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
483 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
484 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
485
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
486 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
487 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
488 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
489 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
490 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
491 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
492 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
493
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
494 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
495 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
496 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
497 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
498 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
499 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
500 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
501
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
502 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
503 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
504 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
505 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
506 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
507 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
508 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
509
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
510 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
511 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
512 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
513 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
514 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
515 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
516 }