annotate i386/cavsdsp_mmx.c @ 6920:d02af7474bff libavcodec

Prevent 128*1<<trellis from becoming 0 and creating 0 sized arrays. fixes CID84 RUN2 CID85 RUN2 CID86 RUN2 CID87 RUN2 CID88 RUN2 CID89 RUN2 CID90 RUN2 CID91 RUN2 CID92 RUN2 CID93 RUN2 CID94 RUN2 CID95 RUN2 CID96 RUN2 CID97 RUN2 CID98 RUN2 CID99 RUN2 CID100 RUN2 CID101 RUN2 CID102 RUN2 CID103 RUN2 CID104 RUN2 CID105 RUN2 CID106 RUN2
author michael
date Wed, 28 May 2008 11:59:41 +0000
parents f7cbb7733146
children eebc7209c47f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
1 /*
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
2 * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
3 * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
4 *
5967
15ed47af1838 Misc spelling fixes, prefer American over British English.
diego
parents: 5963
diff changeset
5 * MMX-optimized DSP functions, based on H.264 optimizations by
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
6 * Michael Niedermayer and Loren Merritt
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
7 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
8 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
9 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
10 * FFmpeg is free software; you can redistribute it and/or
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
11 * modify it under the terms of the GNU Lesser General Public
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
12 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
13 * version 2.1 of the License, or (at your option) any later version.
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
14 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
15 * FFmpeg is distributed in the hope that it will be useful,
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
18 * Lesser General Public License for more details.
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
19 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
20 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3524
diff changeset
21 * License along with FFmpeg; if not, write to the Free Software
5215
2b72f9bc4f06 license header consistency cosmetics
diego
parents: 5010
diff changeset
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
23 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
24
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6755
diff changeset
25 #include "libavutil/common.h"
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6755
diff changeset
26 #include "libavutil/x86_cpu.h"
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6755
diff changeset
27 #include "libavcodec/dsputil.h"
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents: 5215
diff changeset
28 #include "dsputil_mmx.h"
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
29
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
30 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
31 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
32 * inverse transform
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
33 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
34 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
35
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
36 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
37 {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
38 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
39 "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
40 "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
41 "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
42 "movq 48(%0), %%mm7 \n\t" /* mm7 = src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
43 "movq %%mm4, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
44 "movq %%mm5, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
45 "movq %%mm2, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
46 "movq %%mm7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
47
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
48 "paddw %%mm4, %%mm4 \n\t" /* mm4 = 2*src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
49 "paddw %%mm3, %%mm3 \n\t" /* mm3 = 2*src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
50 "paddw %%mm6, %%mm6 \n\t" /* mm6 = 2*src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
51 "paddw %%mm1, %%mm1 \n\t" /* mm1 = 2*src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
52 "paddw %%mm4, %%mm0 \n\t" /* mm0 = 3*src7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
53 "paddw %%mm3, %%mm5 \n\t" /* mm5 = 3*src1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
54 "paddw %%mm6, %%mm2 \n\t" /* mm2 = 3*src5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
55 "paddw %%mm1, %%mm7 \n\t" /* mm7 = 3*src3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
56 "psubw %%mm4, %%mm5 \n\t" /* mm5 = 3*src1 - 2*src7 = a0 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
57 "paddw %%mm6, %%mm7 \n\t" /* mm7 = 3*src3 + 2*src5 = a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
58 "psubw %%mm2, %%mm1 \n\t" /* mm1 = 2*src3 - 3*src5 = a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
59 "paddw %%mm0, %%mm3 \n\t" /* mm3 = 2*src1 + 3*src7 = a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
60
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
61 "movq %%mm5, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
62 "movq %%mm7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
63 "movq %%mm3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
64 "movq %%mm1, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
65 SUMSUB_BA( %%mm7, %%mm5 ) /* mm7 = a0 + a1 mm5 = a0 - a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
66 "paddw %%mm3, %%mm7 \n\t" /* mm7 = a0 + a1 + a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
67 "paddw %%mm1, %%mm5 \n\t" /* mm5 = a0 - a1 + a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
68 "paddw %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
69 "paddw %%mm5, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
70 "paddw %%mm6, %%mm7 \n\t" /* mm7 = b4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
71 "paddw %%mm4, %%mm5 \n\t" /* mm5 = b5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
72
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
73 SUMSUB_BA( %%mm1, %%mm3 ) /* mm1 = a3 + a2 mm3 = a3 - a2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
74 "psubw %%mm1, %%mm4 \n\t" /* mm4 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
75 "movq %%mm4, %%mm1 \n\t" /* mm1 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
76 "psubw %%mm6, %%mm3 \n\t" /* mm3 = a3 - a2 - a1 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
77 "paddw %%mm1, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
78 "paddw %%mm3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
79 "psubw %%mm2, %%mm1 \n\t" /* mm1 = b7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
80 "paddw %%mm0, %%mm3 \n\t" /* mm3 = b6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
81
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
82 "movq 32(%0), %%mm2 \n\t" /* mm2 = src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
83 "movq 96(%0), %%mm6 \n\t" /* mm6 = src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
84 "movq %%mm2, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
85 "movq %%mm6, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
86 "psllw $2, %%mm4 \n\t" /* mm4 = 4*src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
87 "psllw $2, %%mm6 \n\t" /* mm6 = 4*src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
88 "paddw %%mm4, %%mm2 \n\t" /* mm2 = 5*src2 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
89 "paddw %%mm6, %%mm0 \n\t" /* mm0 = 5*src6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
90 "paddw %%mm2, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
91 "paddw %%mm0, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
92 "psubw %%mm0, %%mm4 \n\t" /* mm4 = 4*src2 - 10*src6 = a7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
93 "paddw %%mm2, %%mm6 \n\t" /* mm6 = 4*src6 + 10*src2 = a6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
94
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
95 "movq (%0), %%mm2 \n\t" /* mm2 = src0 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
96 "movq 64(%0), %%mm0 \n\t" /* mm0 = src4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
97 SUMSUB_BA( %%mm0, %%mm2 ) /* mm0 = src0+src4 mm2 = src0-src4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
98 "psllw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
99 "psllw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
100 "paddw %1, %%mm0 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
101 "paddw %1, %%mm2 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
102
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
103 SUMSUB_BA( %%mm6, %%mm0 ) /* mm6 = a4 + a6 mm0 = a4 - a6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
104 SUMSUB_BA( %%mm4, %%mm2 ) /* mm4 = a5 + a7 mm2 = a5 - a7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
105 SUMSUB_BA( %%mm7, %%mm6 ) /* mm7 = dst0 mm6 = dst7 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
106 SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
107 SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
108 SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
109 :: "r"(block), "m"(bias)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
110 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
111 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
112
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
113 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
114 {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
115 int i;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
116 DECLARE_ALIGNED_8(int16_t, b2[64]);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
117
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
118 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
119 DECLARE_ALIGNED_8(uint64_t, tmp);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
120
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
121 cavs_idct8_1d(block+4*i, ff_pw_4);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
122
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
123 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
124 "psraw $3, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
125 "psraw $3, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
126 "psraw $3, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
127 "psraw $3, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
128 "psraw $3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
129 "psraw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
130 "psraw $3, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
131 "psraw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
132 "movq %%mm7, %0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
133 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
134 "movq %%mm0, 8(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
135 "movq %%mm6, 24(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
136 "movq %%mm7, 40(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
137 "movq %%mm4, 56(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
138 "movq %0, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
139 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
140 "movq %%mm7, (%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
141 "movq %%mm1, 16(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
142 "movq %%mm0, 32(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
143 "movq %%mm3, 48(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
144 : "=m"(tmp)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
145 : "r"(b2+32*i)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
146 : "memory"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
147 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
148 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
149
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
150 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
151 cavs_idct8_1d(b2+4*i, ff_pw_64);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
152
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
153 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
154 "psraw $7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
155 "psraw $7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
156 "psraw $7, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
157 "psraw $7, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
158 "psraw $7, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
159 "psraw $7, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
160 "psraw $7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
161 "psraw $7, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
162 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
163 "movq %%mm5, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
164 "movq %%mm3, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
165 "movq %%mm1, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
166 "movq %%mm0, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
167 "movq %%mm2, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
168 "movq %%mm4, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
169 "movq %%mm6, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
170 :: "r"(b2+4*i)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
171 : "memory"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
172 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
173 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
174
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
175 add_pixels_clamped_mmx(b2, dst, stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
176
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
177 /* clear block */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
178 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
179 "pxor %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
180 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
181 "movq %%mm7, 8(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
182 "movq %%mm7, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
183 "movq %%mm7, 24(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
184 "movq %%mm7, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
185 "movq %%mm7, 40(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
186 "movq %%mm7, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
187 "movq %%mm7, 56(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
188 "movq %%mm7, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
189 "movq %%mm7, 72(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
190 "movq %%mm7, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
191 "movq %%mm7, 88(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
192 "movq %%mm7, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
193 "movq %%mm7, 104(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
194 "movq %%mm7, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
195 "movq %%mm7, 120(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
196 :: "r" (block)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
197 );
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
198 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
199
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
200 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
201 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
202 * motion compensation
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
203 *
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
204 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
205
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
206 /* vertical filter [-1 -2 96 42 -7 0] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
207 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
208 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
209 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
210 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
211 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
212 "pmullw %6, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
213 "psllw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
214 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
215 "psraw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
216 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
217 "paddw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
218 "paddw "#B", "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
219 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
220 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
221 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
222 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
223 "psraw $1, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
224 "psubw "#A", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
225 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
226 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
227 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
228 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
229 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
230
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
231 /* vertical filter [ 0 -1 5 5 -1 0] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
232 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
233 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
234 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
235 "paddw "#D", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
236 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
237 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
238 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
239 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
240 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
241 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
242 "psraw $3, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
243 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
244 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
245 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
246
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
247 /* vertical filter [ 0 -7 42 96 -2 -1] */
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
248 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
249 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
250 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
251 "pmullw %6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
252 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
253 "pmullw %5, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
254 "psllw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
255 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
256 "psraw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
257 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
258 "paddw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
259 "paddw "#E", "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
260 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
261 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
262 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
263 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
264 "psraw $1, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
265 "psubw "#F", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
266 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
267 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
268 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
269 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
270 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
271
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
272
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
273 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
274 int w= 2;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
275 src -= 2*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
276 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
277 while(w--){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
278 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
279 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
280 "movd (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
281 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
282 "movd (%0), %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
283 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
284 "movd (%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
285 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
286 "movd (%0), %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
287 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
288 "movd (%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
289 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
290 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
291 "punpcklbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
292 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
293 "punpcklbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
294 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
295 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
296 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
297 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
298 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
299 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
300 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
301 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
302 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
303 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
304 : "+a"(src), "+c"(dst)\
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6135
diff changeset
305 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
306 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
307 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
308 if(h==16){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
309 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
310 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
311 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
312 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
313 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
314 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
315 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
316 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
317 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
318 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
319 : "+a"(src), "+c"(dst)\
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6135
diff changeset
320 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
321 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
322 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
323 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
324 src += 4-(h+5)*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
325 dst += 4-h*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
326 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
327
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
328 #define QPEL_CAVS(OPNAME, OP, MMX)\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
329 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
330 int h=8;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
331 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
332 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
333 "movq %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
334 "1: \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
335 "movq (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
336 "movq 1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
337 "movq %%mm0, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
338 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
339 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
340 "punpckhbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
341 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
342 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
343 "paddw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
344 "paddw %%mm3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
345 "pmullw %%mm6, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
346 "pmullw %%mm6, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
347 "movq -1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
348 "movq 2(%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
349 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
350 "movq %%mm4, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
351 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
352 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
353 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
354 "punpckhbw %%mm7, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
355 "paddw %%mm4, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
356 "paddw %%mm3, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
357 "psubw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
358 "psubw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
359 "movq %6, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
360 "paddw %%mm5, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
361 "paddw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
362 "psraw $3, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
363 "psraw $3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
364 "packuswb %%mm1, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
365 OP(%%mm0, (%1),%%mm5, q) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
366 "add %3, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
367 "add %4, %1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
368 "decl %2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
369 " jnz 1b \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
370 : "+a"(src), "+c"(dst), "+m"(h)\
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6135
diff changeset
371 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
3524
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
372 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
373 );\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
374 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
375 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
376 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
377 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
378 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
379 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
380 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
381 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
382 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
383 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
384 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
385 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
386 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
387 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
388 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
389 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
390 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
391 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
392 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
393 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
394 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
395 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
396 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
397 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
398 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
399 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
400 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
401 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
402 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
403 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
404 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
405 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
406 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
407 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
408 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
409 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
410 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
411 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
412 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
413 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
414 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
415 src += 8*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
416 dst += 8*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
417 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
418 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
419 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
420
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
421 #define CAVS_MC(OPNAME, SIZE, MMX) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
422 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
423 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
424 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
425 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
426 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
427 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
428 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
429 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
430 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
431 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
432 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
433 \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
434 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
435 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
436 }\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
437
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
438 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
439 #define AVG_3DNOW_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
440 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
441 "pavgusb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
442 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
443 #define AVG_MMX2_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
444 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
445 "pavgb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
446 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
447
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
448 QPEL_CAVS(put_, PUT_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
449 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
450 QPEL_CAVS(put_, PUT_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
451 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
452
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
453 CAVS_MC(put_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
454 CAVS_MC(put_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
455 CAVS_MC(avg_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
456 CAVS_MC(avg_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
457 CAVS_MC(put_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
458 CAVS_MC(put_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
459 CAVS_MC(avg_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
460 CAVS_MC(avg_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
461
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
462 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
463 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
464 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
465 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
466
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
467 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
468 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
469 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
470 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
471 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
472 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
473 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
474
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
475 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
476 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
477 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
478 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
479 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
480 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
481 }
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
482
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
483 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
484 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
485 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
486 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
487 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
488 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
489 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
490
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
491 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
492 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
493 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
494 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
495 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
496 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder
stefang
parents:
diff changeset
497 }