annotate i386/vp3dsp_mmx.c @ 7922:ed0ebbb168b6 libavcodec

Do not use the generic "alloc missing references" code for h.264 as it does not work correctly in that case. Fixes issue652.
author michael
date Thu, 25 Sep 2008 14:34:14 +0000
parents e05e021fce72
children eebc7209c47f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
1 /*
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
2 * Copyright (C) 2004 the ffmpeg project
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
3 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
4 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
5 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
10 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
14 * Lesser General Public License for more details.
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
15 *
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
19 */
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
20
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
21 /**
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
22 * @file vp3dsp_mmx.c
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
23 * MMX-optimized functions cribbed from the original VP3 source code.
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
24 */
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
25
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 5014
diff changeset
26 #include "libavcodec/dsputil.h"
7742
bff9b5fea03f Use ff_pw_8 in MMX/SSE VP3 IDCT
conrad
parents: 6763
diff changeset
27 #include "dsputil_mmx.h"
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
28
7759
892ca48b7d76 Use ff_vp3_idct_data in vp3dsp_mmx.c rather than duplicating it
conrad
parents: 7742
diff changeset
29 extern const uint16_t ff_vp3_idct_data[];
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
30
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
31 /* from original comments: The Macro does IDct on 4 1-D Dcts */
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
32 #define BeginIDCT() \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
33 "movq "I(3)", %%mm2 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
34 "movq "C(3)", %%mm6 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
35 "movq %%mm2, %%mm4 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
36 "movq "J(5)", %%mm7 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
37 "pmulhw %%mm6, %%mm4 \n\t" /* r4 = c3*i3 - i3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
38 "movq "C(5)", %%mm1 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
39 "pmulhw %%mm7, %%mm6 \n\t" /* r6 = c3*i5 - i5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
40 "movq %%mm1, %%mm5 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
41 "pmulhw %%mm2, %%mm1 \n\t" /* r1 = c5*i3 - i3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
42 "movq "I(1)", %%mm3 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
43 "pmulhw %%mm7, %%mm5 \n\t" /* r5 = c5*i5 - i5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
44 "movq "C(1)", %%mm0 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
45 "paddw %%mm2, %%mm4 \n\t" /* r4 = c3*i3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
46 "paddw %%mm7, %%mm6 \n\t" /* r6 = c3*i5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
47 "paddw %%mm1, %%mm2 \n\t" /* r2 = c5*i3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
48 "movq "J(7)", %%mm1 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
49 "paddw %%mm5, %%mm7 \n\t" /* r7 = c5*i5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
50 "movq %%mm0, %%mm5 \n\t" /* r5 = c1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
51 "pmulhw %%mm3, %%mm0 \n\t" /* r0 = c1*i1 - i1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
52 "paddsw %%mm7, %%mm4 \n\t" /* r4 = C = c3*i3 + c5*i5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
53 "pmulhw %%mm1, %%mm5 \n\t" /* r5 = c1*i7 - i7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
54 "movq "C(7)", %%mm7 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
55 "psubsw %%mm2, %%mm6 \n\t" /* r6 = D = c3*i5 - c5*i3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
56 "paddw %%mm3, %%mm0 \n\t" /* r0 = c1*i1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
57 "pmulhw %%mm7, %%mm3 \n\t" /* r3 = c7*i1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
58 "movq "I(2)", %%mm2 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
59 "pmulhw %%mm1, %%mm7 \n\t" /* r7 = c7*i7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
60 "paddw %%mm1, %%mm5 \n\t" /* r5 = c1*i7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
61 "movq %%mm2, %%mm1 \n\t" /* r1 = i2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
62 "pmulhw "C(2)", %%mm2 \n\t" /* r2 = c2*i2 - i2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
63 "psubsw %%mm5, %%mm3 \n\t" /* r3 = B = c7*i1 - c1*i7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
64 "movq "J(6)", %%mm5 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
65 "paddsw %%mm7, %%mm0 \n\t" /* r0 = A = c1*i1 + c7*i7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
66 "movq %%mm5, %%mm7 \n\t" /* r7 = i6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
67 "psubsw %%mm4, %%mm0 \n\t" /* r0 = A - C */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
68 "pmulhw "C(2)", %%mm5 \n\t" /* r5 = c2*i6 - i6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
69 "paddw %%mm1, %%mm2 \n\t" /* r2 = c2*i2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
70 "pmulhw "C(6)", %%mm1 \n\t" /* r1 = c6*i2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
71 "paddsw %%mm4, %%mm4 \n\t" /* r4 = C + C */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
72 "paddsw %%mm0, %%mm4 \n\t" /* r4 = C. = A + C */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
73 "psubsw %%mm6, %%mm3 \n\t" /* r3 = B - D */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
74 "paddw %%mm7, %%mm5 \n\t" /* r5 = c2*i6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
75 "paddsw %%mm6, %%mm6 \n\t" /* r6 = D + D */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
76 "pmulhw "C(6)", %%mm7 \n\t" /* r7 = c6*i6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
77 "paddsw %%mm3, %%mm6 \n\t" /* r6 = D. = B + D */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
78 "movq %%mm4, "I(1)"\n\t" /* save C. at I(1) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
79 "psubsw %%mm5, %%mm1 \n\t" /* r1 = H = c6*i2 - c2*i6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
80 "movq "C(4)", %%mm4 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
81 "movq %%mm3, %%mm5 \n\t" /* r5 = B - D */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
82 "pmulhw %%mm4, %%mm3 \n\t" /* r3 = (c4 - 1) * (B - D) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
83 "paddsw %%mm2, %%mm7 \n\t" /* r3 = (c4 - 1) * (B - D) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
84 "movq %%mm6, "I(2)"\n\t" /* save D. at I(2) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
85 "movq %%mm0, %%mm2 \n\t" /* r2 = A - C */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
86 "movq "I(0)", %%mm6 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
87 "pmulhw %%mm4, %%mm0 \n\t" /* r0 = (c4 - 1) * (A - C) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
88 "paddw %%mm3, %%mm5 \n\t" /* r5 = B. = c4 * (B - D) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
89 "movq "J(4)", %%mm3 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
90 "psubsw %%mm1, %%mm5 \n\t" /* r5 = B.. = B. - H */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
91 "paddw %%mm0, %%mm2 \n\t" /* r0 = A. = c4 * (A - C) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
92 "psubsw %%mm3, %%mm6 \n\t" /* r6 = i0 - i4 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
93 "movq %%mm6, %%mm0 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
94 "pmulhw %%mm4, %%mm6 \n\t" /* r6 = (c4 - 1) * (i0 - i4) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
95 "paddsw %%mm3, %%mm3 \n\t" /* r3 = i4 + i4 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
96 "paddsw %%mm1, %%mm1 \n\t" /* r1 = H + H */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
97 "paddsw %%mm0, %%mm3 \n\t" /* r3 = i0 + i4 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
98 "paddsw %%mm5, %%mm1 \n\t" /* r1 = H. = B + H */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
99 "pmulhw %%mm3, %%mm4 \n\t" /* r4 = (c4 - 1) * (i0 + i4) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
100 "paddsw %%mm0, %%mm6 \n\t" /* r6 = F = c4 * (i0 - i4) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
101 "psubsw %%mm2, %%mm6 \n\t" /* r6 = F. = F - A. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
102 "paddsw %%mm2, %%mm2 \n\t" /* r2 = A. + A. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
103 "movq "I(1)", %%mm0 \n\t" /* r0 = C. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
104 "paddsw %%mm6, %%mm2 \n\t" /* r2 = A.. = F + A. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
105 "paddw %%mm3, %%mm4 \n\t" /* r4 = E = c4 * (i0 + i4) */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
106 "psubsw %%mm1, %%mm2 \n\t" /* r2 = R2 = A.. - H. */
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
107
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
108 /* RowIDCT gets ready to transpose */
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
109 #define RowIDCT() \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
110 BeginIDCT() \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
111 "movq "I(2)", %%mm3 \n\t" /* r3 = D. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
112 "psubsw %%mm7, %%mm4 \n\t" /* r4 = E. = E - G */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
113 "paddsw %%mm1, %%mm1 \n\t" /* r1 = H. + H. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
114 "paddsw %%mm7, %%mm7 \n\t" /* r7 = G + G */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
115 "paddsw %%mm2, %%mm1 \n\t" /* r1 = R1 = A.. + H. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
116 "paddsw %%mm4, %%mm7 \n\t" /* r1 = R1 = A.. + H. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
117 "psubsw %%mm3, %%mm4 \n\t" /* r4 = R4 = E. - D. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
118 "paddsw %%mm3, %%mm3 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
119 "psubsw %%mm5, %%mm6 \n\t" /* r6 = R6 = F. - B.. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
120 "paddsw %%mm5, %%mm5 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
121 "paddsw %%mm4, %%mm3 \n\t" /* r3 = R3 = E. + D. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
122 "paddsw %%mm6, %%mm5 \n\t" /* r5 = R5 = F. + B.. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
123 "psubsw %%mm0, %%mm7 \n\t" /* r7 = R7 = G. - C. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
124 "paddsw %%mm0, %%mm0 \n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
125 "movq %%mm1, "I(1)"\n\t" /* save R1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
126 "paddsw %%mm7, %%mm0 \n\t" /* r0 = R0 = G. + C. */
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
127
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
128 /* Column IDCT normalizes and stores final results */
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
129 #define ColumnIDCT() \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
130 BeginIDCT() \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
131 "paddsw "OC_8", %%mm2 \n\t" /* adjust R2 (and R1) for shift */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
132 "paddsw %%mm1, %%mm1 \n\t" /* r1 = H. + H. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
133 "paddsw %%mm2, %%mm1 \n\t" /* r1 = R1 = A.. + H. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
134 "psraw $4, %%mm2 \n\t" /* r2 = NR2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
135 "psubsw %%mm7, %%mm4 \n\t" /* r4 = E. = E - G */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
136 "psraw $4, %%mm1 \n\t" /* r1 = NR1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
137 "movq "I(2)", %%mm3 \n\t" /* r3 = D. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
138 "paddsw %%mm7, %%mm7 \n\t" /* r7 = G + G */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
139 "movq %%mm2, "I(2)"\n\t" /* store NR2 at I2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
140 "paddsw %%mm4, %%mm7 \n\t" /* r7 = G. = E + G */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
141 "movq %%mm1, "I(1)"\n\t" /* store NR1 at I1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
142 "psubsw %%mm3, %%mm4 \n\t" /* r4 = R4 = E. - D. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
143 "paddsw "OC_8", %%mm4 \n\t" /* adjust R4 (and R3) for shift */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
144 "paddsw %%mm3, %%mm3 \n\t" /* r3 = D. + D. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
145 "paddsw %%mm4, %%mm3 \n\t" /* r3 = R3 = E. + D. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
146 "psraw $4, %%mm4 \n\t" /* r4 = NR4 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
147 "psubsw %%mm5, %%mm6 \n\t" /* r6 = R6 = F. - B.. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
148 "psraw $4, %%mm3 \n\t" /* r3 = NR3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
149 "paddsw "OC_8", %%mm6 \n\t" /* adjust R6 (and R5) for shift */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
150 "paddsw %%mm5, %%mm5 \n\t" /* r5 = B.. + B.. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
151 "paddsw %%mm6, %%mm5 \n\t" /* r5 = R5 = F. + B.. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
152 "psraw $4, %%mm6 \n\t" /* r6 = NR6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
153 "movq %%mm4, "J(4)"\n\t" /* store NR4 at J4 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
154 "psraw $4, %%mm5 \n\t" /* r5 = NR5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
155 "movq %%mm3, "I(3)"\n\t" /* store NR3 at I3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
156 "psubsw %%mm0, %%mm7 \n\t" /* r7 = R7 = G. - C. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
157 "paddsw "OC_8", %%mm7 \n\t" /* adjust R7 (and R0) for shift */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
158 "paddsw %%mm0, %%mm0 \n\t" /* r0 = C. + C. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
159 "paddsw %%mm7, %%mm0 \n\t" /* r0 = R0 = G. + C. */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
160 "psraw $4, %%mm7 \n\t" /* r7 = NR7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
161 "movq %%mm6, "J(6)"\n\t" /* store NR6 at J6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
162 "psraw $4, %%mm0 \n\t" /* r0 = NR0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
163 "movq %%mm5, "J(5)"\n\t" /* store NR5 at J5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
164 "movq %%mm7, "J(7)"\n\t" /* store NR7 at J7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
165 "movq %%mm0, "I(0)"\n\t" /* store NR0 at I0 */
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
166
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
167 /* Following macro does two 4x4 transposes in place.
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
168
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
169 At entry (we assume):
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
170
1969
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
171 r0 = a3 a2 a1 a0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
172 I(1) = b3 b2 b1 b0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
173 r2 = c3 c2 c1 c0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
174 r3 = d3 d2 d1 d0
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
175
1969
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
176 r4 = e3 e2 e1 e0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
177 r5 = f3 f2 f1 f0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
178 r6 = g3 g2 g1 g0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
179 r7 = h3 h2 h1 h0
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
180
1969
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
181 At exit, we have:
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
182
1969
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
183 I(0) = d0 c0 b0 a0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
184 I(1) = d1 c1 b1 a1
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
185 I(2) = d2 c2 b2 a2
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
186 I(3) = d3 c3 b3 a3
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2753
diff changeset
187
1969
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
188 J(4) = h0 g0 f0 e0
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
189 J(5) = h1 g1 f1 e1
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
190 J(6) = h2 g2 f2 e2
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
191 J(7) = h3 g3 f3 e3
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
192
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
193 I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
194 J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
195
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
196 Since r1 is free at entry, we calculate the Js first. */
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
197 #define Transpose() \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
198 "movq %%mm4, %%mm1 \n\t" /* r1 = e3 e2 e1 e0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
199 "punpcklwd %%mm5, %%mm4 \n\t" /* r4 = f1 e1 f0 e0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
200 "movq %%mm0, "I(0)"\n\t" /* save a3 a2 a1 a0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
201 "punpckhwd %%mm5, %%mm1 \n\t" /* r1 = f3 e3 f2 e2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
202 "movq %%mm6, %%mm0 \n\t" /* r0 = g3 g2 g1 g0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
203 "punpcklwd %%mm7, %%mm6 \n\t" /* r6 = h1 g1 h0 g0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
204 "movq %%mm4, %%mm5 \n\t" /* r5 = f1 e1 f0 e0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
205 "punpckldq %%mm6, %%mm4 \n\t" /* r4 = h0 g0 f0 e0 = R4 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
206 "punpckhdq %%mm6, %%mm5 \n\t" /* r5 = h1 g1 f1 e1 = R5 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
207 "movq %%mm1, %%mm6 \n\t" /* r6 = f3 e3 f2 e2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
208 "movq %%mm4, "J(4)"\n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
209 "punpckhwd %%mm7, %%mm0 \n\t" /* r0 = h3 g3 h2 g2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
210 "movq %%mm5, "J(5)"\n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
211 "punpckhdq %%mm0, %%mm6 \n\t" /* r6 = h3 g3 f3 e3 = R7 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
212 "movq "I(0)", %%mm4 \n\t" /* r4 = a3 a2 a1 a0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
213 "punpckldq %%mm0, %%mm1 \n\t" /* r1 = h2 g2 f2 e2 = R6 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
214 "movq "I(1)", %%mm5 \n\t" /* r5 = b3 b2 b1 b0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
215 "movq %%mm4, %%mm0 \n\t" /* r0 = a3 a2 a1 a0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
216 "movq %%mm6, "J(7)"\n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
217 "punpcklwd %%mm5, %%mm0 \n\t" /* r0 = b1 a1 b0 a0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
218 "movq %%mm1, "J(6)"\n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
219 "punpckhwd %%mm5, %%mm4 \n\t" /* r4 = b3 a3 b2 a2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
220 "movq %%mm2, %%mm5 \n\t" /* r5 = c3 c2 c1 c0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
221 "punpcklwd %%mm3, %%mm2 \n\t" /* r2 = d1 c1 d0 c0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
222 "movq %%mm0, %%mm1 \n\t" /* r1 = b1 a1 b0 a0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
223 "punpckldq %%mm2, %%mm0 \n\t" /* r0 = d0 c0 b0 a0 = R0 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
224 "punpckhdq %%mm2, %%mm1 \n\t" /* r1 = d1 c1 b1 a1 = R1 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
225 "movq %%mm4, %%mm2 \n\t" /* r2 = b3 a3 b2 a2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
226 "movq %%mm0, "I(0)"\n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
227 "punpckhwd %%mm3, %%mm5 \n\t" /* r5 = d3 c3 d2 c2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
228 "movq %%mm1, "I(1)"\n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
229 "punpckhdq %%mm5, %%mm4 \n\t" /* r4 = d3 c3 b3 a3 = R3 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
230 "punpckldq %%mm5, %%mm2 \n\t" /* r2 = d2 c2 b2 a2 = R2 */ \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
231 "movq %%mm4, "I(3)"\n\t" \
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
232 "movq %%mm2, "I(2)"\n\t"
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
233
2696
9699d325049d porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents: 1977
diff changeset
234 void ff_vp3_idct_mmx(int16_t *output_data)
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
235 {
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
236 /* eax = quantized input
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
237 * ebx = dequantizer matrix
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
238 * ecx = IDCT constants
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
239 * M(I) = ecx + MaskOffset(0) + I * 8
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
240 * C(I) = ecx + CosineOffset(32) + (I-1) * 8
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
241 * edx = output
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
242 * r0..r7 = mm0..mm7
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
243 */
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
244
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
245 #define C(x) AV_STRINGIFY(16*(x-1))"(%1)"
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
246 #define OC_8 "%2"
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
247
1969
56cb752222cc correct MMX-optimized variant of VP3 IDCT, with comments (thank you
melanson
parents: 1866
diff changeset
248 /* at this point, function has completed dequantization + dezigzag +
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
249 * partial transposition; now do the idct itself */
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
250 #define I(x) AV_STRINGIFY(16* x )"(%0)"
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
251 #define J(x) AV_STRINGIFY(16*(x-4) + 8)"(%0)"
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
252
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
253 asm volatile (
7879
e05e021fce72 Cosmetics: reindent
conrad
parents: 7877
diff changeset
254 RowIDCT()
e05e021fce72 Cosmetics: reindent
conrad
parents: 7877
diff changeset
255 Transpose()
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
256
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
257 #undef I
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
258 #undef J
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
259 #define I(x) AV_STRINGIFY(16* x + 64)"(%0)"
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
260 #define J(x) AV_STRINGIFY(16*(x-4) + 72)"(%0)"
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
261
7879
e05e021fce72 Cosmetics: reindent
conrad
parents: 7877
diff changeset
262 RowIDCT()
e05e021fce72 Cosmetics: reindent
conrad
parents: 7877
diff changeset
263 Transpose()
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
264
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
265 #undef I
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
266 #undef J
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
267 #define I(x) AV_STRINGIFY(16*x)"(%0)"
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
268 #define J(x) AV_STRINGIFY(16*x)"(%0)"
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
269
7879
e05e021fce72 Cosmetics: reindent
conrad
parents: 7877
diff changeset
270 ColumnIDCT()
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
271
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
272 #undef I
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
273 #undef J
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
274 #define I(x) AV_STRINGIFY(16*x + 8)"(%0)"
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
275 #define J(x) AV_STRINGIFY(16*x + 8)"(%0)"
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
276
7879
e05e021fce72 Cosmetics: reindent
conrad
parents: 7877
diff changeset
277 ColumnIDCT()
7877
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
278 :: "r"(output_data), "r"(ff_vp3_idct_data), "m"(ff_pw_8)
8759422d660a Rewrite MMX VP3 IDCT in inline asm
conrad
parents: 7759
diff changeset
279 );
1866
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
280 #undef I
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
281 #undef J
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
282
1755f959ab7f seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
diff changeset
283 }
5014
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
284
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
285 void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
286 {
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
287 ff_vp3_idct_mmx(block);
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
288 put_signed_pixels_clamped_mmx(block, dest, line_size);
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
289 }
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
290
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
291 void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
292 {
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
293 ff_vp3_idct_mmx(block);
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
294 add_pixels_clamped_mmx(block, dest, line_size);
42b99a3aadde better separation of vp3dsp functions from dsputil_mmx.c
aurel
parents: 5010
diff changeset
295 }