annotate x86/dct32_sse.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents db9ef48dc0e4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12100
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
1 /*
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
2 * 32 point SSE-optimized DCT transform
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
3 * Copyright (c) 2010 Vitor Sessak
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
4 *
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
5 * This file is part of FFmpeg.
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
6 *
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
11 *
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
15 * Lesser General Public License for more details.
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
16 *
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
20 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
21
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
22 #include <stdint.h>
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
23
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
24 #include "libavutil/x86_cpu.h"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
25 #include "libavutil/mem.h"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
26 #include "libavcodec/dsputil.h"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
27 #include "fft.h"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
28
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
29 DECLARE_ALIGNED(16, static const float, b1)[] = {
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
30 0.500603, 0.505471, 0.515447, 0.531043,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
31 0.553104, 0.582935, 0.622504, 0.674808,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
32 -1.169440, -0.972568, -0.839350, -0.744536,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
33 -10.190008, -3.407609, -2.057781, -1.484165,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
34 0.502419, 0.522499, 0.566944, 0.646822,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
35 0.788155, 1.060678, 1.722447, 5.101149,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
36 0.509796, 0.601345, 0.899976, 2.562916,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
37 1.000000, 1.000000, 1.306563, 0.541196,
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
38 1.000000, 0.707107, 1.000000, -0.707107
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
39 };
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
40
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
41 DECLARE_ALIGNED(16, static const int32_t, smask)[4] = {
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
42 0, 0, 0x80000000, 0x80000000
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
43 };
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
44
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
45 /* butterfly operator */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
46 #define BUTTERFLY(a,b,c,tmp) \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
47 "movaps %%" #a ", %%" #tmp " \n\t" \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
48 "subps %%" #b ", %%" #a " \n\t" \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
49 "addps %%" #tmp ", %%" #b " \n\t" \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
50 "mulps " #c ", %%" #a " \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
51
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
52 ///* Same as BUTTERFLY when vectors a and b overlap */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
53 #define BUTTERFLY0(val, mask, cos, tmp, shuf) \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
54 "movaps %%" #val ", %%" #tmp " \n\t" \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
55 "shufps " #shuf ", %%" #val ",%%" #val " \n\t" \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
56 "xorps %%" #mask ", %%" #tmp " \n\t" /* flip signs */ \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
57 "addps %%" #tmp ", %%" #val " \n\t" \
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
58 "mulps %%" #cos ", %%" #val " \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
59
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
60 #define BUTTERFLY2(val, mask, cos, tmp) BUTTERFLY0(val, mask, cos, tmp, $0x1b)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
61 #define BUTTERFLY3(val, mask, cos, tmp) BUTTERFLY0(val, mask, cos, tmp, $0xb1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
62
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
63 void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
64 {
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
65 int32_t tmp1 = 0;
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
66 __asm__ volatile(
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
67 /* pass 1 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
68
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
69 "movaps (%4), %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
70 "movaps 112(%4), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
71 "shufps $0x1b, %%xmm1, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
72 BUTTERFLY(xmm0, xmm1, (%2), xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
73
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
74 "movaps 64(%4), %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
75 "movaps 48(%4), %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
76 "shufps $0x1b, %%xmm4, %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
77 BUTTERFLY(xmm7, xmm4, 48(%2), xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
78
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
79
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
80 /* pass 2 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
81 "movaps 64(%2), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
82 BUTTERFLY(xmm1, xmm4, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
83 "movaps %%xmm1, 48(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
84 "movaps %%xmm4, (%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
85
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
86 /* pass 1 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
87 "movaps 16(%4), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
88 "movaps 96(%4), %%xmm6 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
89 "shufps $0x1b, %%xmm6, %%xmm6 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
90 BUTTERFLY(xmm1, xmm6, 16(%2), xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
91
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
92 "movaps 80(%4), %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
93 "movaps 32(%4), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
94 "shufps $0x1b, %%xmm5, %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
95 BUTTERFLY(xmm4, xmm5, 32(%2), xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
96
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
97 /* pass 2 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
98 BUTTERFLY(xmm0, xmm7, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
99
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
100 "movaps 80(%2), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
101 BUTTERFLY(xmm6, xmm5, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
102
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
103 BUTTERFLY(xmm1, xmm4, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
104
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
105 /* pass 3 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
106 "movaps 96(%2), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
107 "shufps $0x1b, %%xmm1, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
108 BUTTERFLY(xmm0, xmm1, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
109 "movaps %%xmm0, 112(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
110 "movaps %%xmm1, 96(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
111
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
112 "movaps 0(%1), %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
113 "shufps $0x1b, %%xmm5, %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
114 BUTTERFLY(xmm0, xmm5, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
115
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
116 "movaps 48(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
117 "shufps $0x1b, %%xmm6, %%xmm6 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
118 BUTTERFLY(xmm1, xmm6, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
119 "movaps %%xmm1, 48(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
120
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
121 "shufps $0x1b, %%xmm4, %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
122 BUTTERFLY(xmm7, xmm4, %%xmm2, xmm3)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
123
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
124 /* pass 4 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
125 "movaps (%3), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
126 "movaps 112(%2), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
127
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
128 BUTTERFLY2(xmm5, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
129
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
130 BUTTERFLY2(xmm0, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
131 "movaps %%xmm0, 16(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
132
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
133 BUTTERFLY2(xmm6, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
134 "movaps %%xmm6, 32(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
135
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
136 "movaps 48(%1), %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
137 BUTTERFLY2(xmm0, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
138 "movaps %%xmm0, 48(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
139
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
140 BUTTERFLY2(xmm4, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
141
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
142 BUTTERFLY2(xmm7, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
143
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
144 "movaps 96(%1), %%xmm6 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
145 BUTTERFLY2(xmm6, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
146
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
147 "movaps 112(%1), %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
148 BUTTERFLY2(xmm0, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
149
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
150 /* pass 5 */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
151 "movaps 128(%2), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
152 "shufps $0xCC, %%xmm3,%%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
153
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
154 BUTTERFLY3(xmm5, xmm3, xmm2, xmm1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
155 "movaps %%xmm5, (%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
156
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
157 "movaps 16(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
158 BUTTERFLY3(xmm1, xmm3, xmm2, xmm5)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
159 "movaps %%xmm1, 16(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
160
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
161 BUTTERFLY3(xmm4, xmm3, xmm2, xmm5)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
162 "movaps %%xmm4, 64(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
163
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
164 BUTTERFLY3(xmm7, xmm3, xmm2, xmm5)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
165 "movaps %%xmm7, 80(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
166
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
167 "movaps 32(%1), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
168 BUTTERFLY3(xmm5, xmm3, xmm2, xmm7)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
169 "movaps %%xmm5, 32(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
170
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
171 "movaps 48(%1), %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
172 BUTTERFLY3(xmm4, xmm3, xmm2, xmm7)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
173 "movaps %%xmm4, 48(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
174
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
175 BUTTERFLY3(xmm6, xmm3, xmm2, xmm7)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
176 "movaps %%xmm6, 96(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
177
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
178 BUTTERFLY3(xmm0, xmm3, xmm2, xmm7)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
179 "movaps %%xmm0, 112(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
180
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
181
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
182 /* pass 6, no SIMD... */
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
183 "movss 56(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
184 "movl 4(%1), %0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
185 "addss 60(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
186 "movss 72(%1), %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
187 "addss %%xmm3, %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
188 "movss 52(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
189 "addss %%xmm3, %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
190 "movss 24(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
191 "addss 28(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
192 "addss 76(%1), %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
193 "addss %%xmm3, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
194 "addss %%xmm4, %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
195 "movss %%xmm1, 16(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
196 "movss 20(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
197 "addss %%xmm3, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
198 "movss 40(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
199 "movss %%xmm1, 48(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
200 "addss 44(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
201 "movss 20(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
202 "addss %%xmm3, %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
203 "addss %%xmm2, %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
204 "addss 28(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
205 "movss %%xmm3, 40(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
206 "addss 36(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
207 "movss 8(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
208 "movss %%xmm2, 56(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
209 "addss 12(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
210 "movss %%xmm5, 8(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
211 "movss %%xmm3, 32(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
212 "movss 52(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
213 "movss 80(%1), %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
214 "movss 120(%1), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
215 "movss %%xmm1, 80(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
216 "movss %%xmm4, 24(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
217 "addss 124(%1), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
218 "movss 64(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
219 "addss 60(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
220 "addss %%xmm5, %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
221 "addss 116(%1), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
222 "movl %0, 64(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
223 "addss %%xmm0, %%xmm6 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
224 "addss %%xmm6, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
225 "movl 12(%1), %0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
226 "movss %%xmm1, 4(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
227 "movss 88(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
228 "movl %0, 96(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
229 "addss 92(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
230 "movss 104(%1), %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
231 "movl 28(%1), %0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
232 "addss 108(%1), %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
233 "addss %%xmm4, %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
234 "addss %%xmm1, %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
235 "addss 84(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
236 "addss %%xmm5, %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
237 "addss %%xmm3, %%xmm6 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
238 "addss %%xmm0, %%xmm3 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
239 "addss %%xmm7, %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
240 "addss 100(%1), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
241 "addss %%xmm4, %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
242 "movl %0, 112(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
243 "movss %%xmm0, 28(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
244 "movss 36(%1), %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
245 "movss %%xmm7, 36(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
246 "addss %%xmm1, %%xmm4 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
247 "movss 116(%1), %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
248 "addss %%xmm2, %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
249 "addss 124(%1), %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
250 "movss %%xmm0, 72(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
251 "movss 44(%1), %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
252 "movss %%xmm6, 12(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
253 "movss %%xmm3, 20(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
254 "addss %%xmm0, %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
255 "movss %%xmm4, 44(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
256 "movss %%xmm2, 88(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
257 "addss 60(%1), %%xmm0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
258 "movl 60(%1), %0 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
259 "movl %0, 120(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
260 "movss %%xmm0, 104(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
261 "addss %%xmm5, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
262 "addss 68(%1), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
263 "movss %%xmm1, 52(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
264 "movss %%xmm5, 60(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
265 "movss 68(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
266 "movss 100(%1), %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
267 "addss %%xmm7, %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
268 "addss 108(%1), %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
269 "addss %%xmm5, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
270 "movss 84(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
271 "addss 92(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
272 "addss %%xmm2, %%xmm5 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
273 "movss %%xmm1, 68(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
274 "addss %%xmm7, %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
275 "movss 76(%1), %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
276 "movss %%xmm2, 84(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
277 "movss %%xmm5, 76(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
278 "movss 108(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
279 "addss %%xmm1, %%xmm7 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
280 "addss 124(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
281 "addss %%xmm2, %%xmm1 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
282 "addss 92(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
283 "movss %%xmm1, 100(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
284 "movss %%xmm2, 108(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
285 "movss 92(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
286 "movss %%xmm7, 92(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
287 "addss 124(%1), %%xmm2 \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
288 "movss %%xmm2, 116(%1) \n\t"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
289 :"+&r"(tmp1)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
290 :"r"(out), "r"(b1), "r"(smask), "r"(in)
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
291 :"memory"
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
292 );
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
293 }
db9ef48dc0e4 Move SSE optimized 32-point DCT to its own file. Should fix breakage with YASM
vitor
parents:
diff changeset
294