annotate i386/cavsdsp_mmx.c @ 4889:beeb03aad909 libavcodec

patch so that the deprecated items show up correctly when building doxygen docs patch by mark cox melbournemark plus ffmpeg minus devel chez gmail dot com
author benoit
date Wed, 02 May 2007 09:13:47 +0000
parents c8c591fe26f8
children d5ba514e3f4a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
1 /*
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
2 * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
3 * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
4 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
5 * MMX optimised DSP functions, based on H.264 optimisations by
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
6 * Michael Niedermayer and Loren Merritt
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
7 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
8 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
9 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
10 * FFmpeg is free software; you can redistribute it and/or
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
11 * modify it under the terms of the GNU Lesser General Public
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
12 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
13 * version 2.1 of the License, or (at your option) any later version.
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
14 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
15 * FFmpeg is distributed in the hope that it will be useful,
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
18 * Lesser General Public License for more details.
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
19 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
20 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
21 * License along with FFmpeg; if not, write to the Free Software
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
23 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
24
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
25 #include "../dsputil.h"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
26 #include "common.h"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
27
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
28 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
29 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
30 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
31 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
32 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
33 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
34
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
35 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
36 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
37 * inverse transform
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
38 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
39 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
40
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
41 #define SUMSUB_BA( a, b ) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
42 "paddw "#b", "#a" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
43 "paddw "#b", "#b" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
44 "psubw "#a", "#b" \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
45
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
46 #define SBUTTERFLY(a,b,t,n)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
47 "movq " #a ", " #t " \n\t" /* abcd */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
48 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
49 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
50
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
51 #define TRANSPOSE4(a,b,c,d,t)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
52 SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
53 SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
54 SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
55 SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
56
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
57 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
58 {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
59 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
60 "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
61 "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
62 "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
63 "movq 48(%0), %%mm7 \n\t" /* mm7 = src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
64 "movq %%mm4, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
65 "movq %%mm5, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
66 "movq %%mm2, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
67 "movq %%mm7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
68
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
69 "paddw %%mm4, %%mm4 \n\t" /* mm4 = 2*src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
70 "paddw %%mm3, %%mm3 \n\t" /* mm3 = 2*src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
71 "paddw %%mm6, %%mm6 \n\t" /* mm6 = 2*src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
72 "paddw %%mm1, %%mm1 \n\t" /* mm1 = 2*src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
73 "paddw %%mm4, %%mm0 \n\t" /* mm0 = 3*src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
74 "paddw %%mm3, %%mm5 \n\t" /* mm5 = 3*src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
75 "paddw %%mm6, %%mm2 \n\t" /* mm2 = 3*src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
76 "paddw %%mm1, %%mm7 \n\t" /* mm7 = 3*src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
77 "psubw %%mm4, %%mm5 \n\t" /* mm5 = 3*src1 - 2*src7 = a0 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
78 "paddw %%mm6, %%mm7 \n\t" /* mm7 = 3*src3 + 2*src5 = a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
79 "psubw %%mm2, %%mm1 \n\t" /* mm1 = 2*src3 - 3*src5 = a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
80 "paddw %%mm0, %%mm3 \n\t" /* mm3 = 2*src1 + 3*src7 = a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
81
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
82 "movq %%mm5, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
83 "movq %%mm7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
84 "movq %%mm3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
85 "movq %%mm1, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
86 SUMSUB_BA( %%mm7, %%mm5 ) /* mm7 = a0 + a1 mm5 = a0 - a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
87 "paddw %%mm3, %%mm7 \n\t" /* mm7 = a0 + a1 + a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
88 "paddw %%mm1, %%mm5 \n\t" /* mm5 = a0 - a1 + a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
89 "paddw %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
90 "paddw %%mm5, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
91 "paddw %%mm6, %%mm7 \n\t" /* mm7 = b4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
92 "paddw %%mm4, %%mm5 \n\t" /* mm5 = b5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
93
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
94 SUMSUB_BA( %%mm1, %%mm3 ) /* mm1 = a3 + a2 mm3 = a3 - a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
95 "psubw %%mm1, %%mm4 \n\t" /* mm4 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
96 "movq %%mm4, %%mm1 \n\t" /* mm1 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
97 "psubw %%mm6, %%mm3 \n\t" /* mm3 = a3 - a2 - a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
98 "paddw %%mm1, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
99 "paddw %%mm3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
100 "psubw %%mm2, %%mm1 \n\t" /* mm1 = b7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
101 "paddw %%mm0, %%mm3 \n\t" /* mm3 = b6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
102
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
103 "movq 32(%0), %%mm2 \n\t" /* mm2 = src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
104 "movq 96(%0), %%mm6 \n\t" /* mm6 = src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
105 "movq %%mm2, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
106 "movq %%mm6, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
107 "psllw $2, %%mm4 \n\t" /* mm4 = 4*src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
108 "psllw $2, %%mm6 \n\t" /* mm6 = 4*src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
109 "paddw %%mm4, %%mm2 \n\t" /* mm2 = 5*src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
110 "paddw %%mm6, %%mm0 \n\t" /* mm0 = 5*src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
111 "paddw %%mm2, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
112 "paddw %%mm0, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
113 "psubw %%mm0, %%mm4 \n\t" /* mm4 = 4*src2 - 10*src6 = a7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
114 "paddw %%mm2, %%mm6 \n\t" /* mm6 = 4*src6 + 10*src2 = a6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
115
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
116 "movq (%0), %%mm2 \n\t" /* mm2 = src0 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
117 "movq 64(%0), %%mm0 \n\t" /* mm0 = src4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
118 SUMSUB_BA( %%mm0, %%mm2 ) /* mm0 = src0+src4 mm2 = src0-src4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
119 "psllw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
120 "psllw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
121 "paddw %1, %%mm0 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
122 "paddw %1, %%mm2 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
123
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
124 SUMSUB_BA( %%mm6, %%mm0 ) /* mm6 = a4 + a6 mm0 = a4 - a6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
125 SUMSUB_BA( %%mm4, %%mm2 ) /* mm4 = a5 + a7 mm2 = a5 - a7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
126 SUMSUB_BA( %%mm7, %%mm6 ) /* mm7 = dst0 mm6 = dst7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
127 SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
128 SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
129 SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
130 :: "r"(block), "m"(bias)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
131 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
132 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
133
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
134 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
135 {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
136 int i;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
137 DECLARE_ALIGNED_8(int16_t, b2[64]);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
138
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
139 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
140 DECLARE_ALIGNED_8(uint64_t, tmp);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
141
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
142 cavs_idct8_1d(block+4*i, ff_pw_4);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
143
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
144 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
145 "psraw $3, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
146 "psraw $3, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
147 "psraw $3, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
148 "psraw $3, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
149 "psraw $3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
150 "psraw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
151 "psraw $3, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
152 "psraw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
153 "movq %%mm7, %0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
154 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
155 "movq %%mm0, 8(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
156 "movq %%mm6, 24(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
157 "movq %%mm7, 40(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
158 "movq %%mm4, 56(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
159 "movq %0, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
160 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
161 "movq %%mm7, (%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
162 "movq %%mm1, 16(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
163 "movq %%mm0, 32(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
164 "movq %%mm3, 48(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
165 : "=m"(tmp)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
166 : "r"(b2+32*i)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
167 : "memory"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
168 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
169 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
170
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
171 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
172 cavs_idct8_1d(b2+4*i, ff_pw_64);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
173
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
174 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
175 "psraw $7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
176 "psraw $7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
177 "psraw $7, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
178 "psraw $7, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
179 "psraw $7, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
180 "psraw $7, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
181 "psraw $7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
182 "psraw $7, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
183 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
184 "movq %%mm5, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
185 "movq %%mm3, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
186 "movq %%mm1, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
187 "movq %%mm0, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
188 "movq %%mm2, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
189 "movq %%mm4, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
190 "movq %%mm6, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
191 :: "r"(b2+4*i)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
192 : "memory"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
193 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
194 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
195
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
196 add_pixels_clamped_mmx(b2, dst, stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
197
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
198 /* clear block */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
199 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
200 "pxor %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
201 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
202 "movq %%mm7, 8(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
203 "movq %%mm7, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
204 "movq %%mm7, 24(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
205 "movq %%mm7, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
206 "movq %%mm7, 40(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
207 "movq %%mm7, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
208 "movq %%mm7, 56(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
209 "movq %%mm7, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
210 "movq %%mm7, 72(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
211 "movq %%mm7, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
212 "movq %%mm7, 88(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
213 "movq %%mm7, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
214 "movq %%mm7, 104(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
215 "movq %%mm7, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
216 "movq %%mm7, 120(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
217 :: "r" (block)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
218 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
219 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
220
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
221 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
222 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
223 * motion compensation
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
224 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
225 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
226
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
227 /* vertical filter [-1 -2 96 42 -7 0] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
228 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
229 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
230 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
231 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
232 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
233 "pmullw %6, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
234 "psllw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
235 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
236 "psraw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
237 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
238 "paddw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
239 "paddw "#B", "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
240 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
241 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
242 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
243 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
244 "psraw $1, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
245 "psubw "#A", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
246 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
247 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
248 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
249 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
250 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
251
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
252 /* vertical filter [ 0 -1 5 5 -1 0] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
253 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
254 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
255 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
256 "paddw "#D", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
257 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
258 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
259 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
260 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
261 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
262 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
263 "psraw $3, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
264 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
265 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
266 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
267
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
268 /* vertical filter [ 0 -7 42 96 -2 -1] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
269 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
270 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
271 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
272 "pmullw %6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
273 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
274 "pmullw %5, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
275 "psllw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
276 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
277 "psraw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
278 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
279 "paddw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
280 "paddw "#E", "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
281 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
282 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
283 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
284 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
285 "psraw $1, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
286 "psubw "#F", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
287 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
288 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
289 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
290 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
291 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
292
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
293
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
294 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
295 int w= 2;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
296 src -= 2*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
297 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
298 while(w--){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
299 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
300 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
301 "movd (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
302 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
303 "movd (%0), %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
304 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
305 "movd (%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
306 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
307 "movd (%0), %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
308 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
309 "movd (%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
310 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
311 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
312 "punpcklbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
313 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
314 "punpcklbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
315 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
316 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
317 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
318 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
319 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
320 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
321 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
322 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
323 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
324 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
325 : "+a"(src), "+c"(dst)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
326 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
327 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
328 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
329 if(h==16){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
330 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
331 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
332 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
333 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
334 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
335 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
336 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
337 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
338 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
339 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
340 : "+a"(src), "+c"(dst)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
341 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
342 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
343 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
344 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
345 src += 4-(h+5)*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
346 dst += 4-h*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
347 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
348
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
349 #define QPEL_CAVS(OPNAME, OP, MMX)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
350 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
351 int h=8;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
352 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
353 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
354 "movq %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
355 "1: \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
356 "movq (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
357 "movq 1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
358 "movq %%mm0, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
359 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
360 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
361 "punpckhbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
362 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
363 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
364 "paddw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
365 "paddw %%mm3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
366 "pmullw %%mm6, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
367 "pmullw %%mm6, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
368 "movq -1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
369 "movq 2(%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
370 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
371 "movq %%mm4, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
372 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
373 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
374 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
375 "punpckhbw %%mm7, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
376 "paddw %%mm4, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
377 "paddw %%mm3, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
378 "psubw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
379 "psubw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
380 "movq %6, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
381 "paddw %%mm5, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
382 "paddw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
383 "psraw $3, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
384 "psraw $3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
385 "packuswb %%mm1, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
386 OP(%%mm0, (%1),%%mm5, q) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
387 "add %3, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
388 "add %4, %1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
389 "decl %2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
390 " jnz 1b \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
391 : "+a"(src), "+c"(dst), "+m"(h)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
392 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
393 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
394 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
395 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
396 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
397 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
398 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
399 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
400 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
401 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
402 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
403 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
404 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
405 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
406 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
407 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
408 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
409 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
410 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
411 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
412 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
413 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
414 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
415 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
416 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
417 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
418 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
419 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
420 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
421 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
422 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
423 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
424 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
425 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
426 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
427 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
428 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
429 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
430 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
431 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
432 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
433 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
434 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
435 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
436 src += 8*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
437 dst += 8*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
438 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
439 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
440 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
441
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
442 #define CAVS_MC(OPNAME, SIZE, MMX) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
443 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
444 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
445 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
446 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
447 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
448 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
449 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
450 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
451 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
452 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
453 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
454 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
455 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
456 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
457 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
458
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
459 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
460 #define AVG_3DNOW_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
461 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
462 "pavgusb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
463 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
464 #define AVG_MMX2_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
465 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
466 "pavgb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
467 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
468
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
469 QPEL_CAVS(put_, PUT_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
470 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
471 QPEL_CAVS(put_, PUT_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
472 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
473
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
474 CAVS_MC(put_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
475 CAVS_MC(put_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
476 CAVS_MC(avg_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
477 CAVS_MC(avg_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
478 CAVS_MC(put_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
479 CAVS_MC(put_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
480 CAVS_MC(avg_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
481 CAVS_MC(avg_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
482
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
483 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
484 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
485 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
486 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
487
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
488 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
489 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
490 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
491 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
492 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
493 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
494 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
495
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
496 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
497 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
498 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
499 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
500 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
501 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
502 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
503
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
504 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
505 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
506 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
507 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
508 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
509 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
510 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
511
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
512 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
513 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
514 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
515 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
516 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
517 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
518 }