Mercurial > libavcodec.hg
annotate dct.c @ 12399:020540442072 libavcodec
Convert ff_imdct_half_sse() to yasm.
This is to avoid split asm sections that attempt to preserve some
registers between sections.
author | alexc |
---|---|
date | Sun, 22 Aug 2010 14:39:58 +0000 |
parents | 1bf322283429 |
children |
rev | line source |
---|---|
10944 | 1 /* |
2 * (I)DCT Transforms | |
3 * Copyright (c) 2009 Peter Ross <pross@xvid.org> | |
4 * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com> | |
5 * Copyright (c) 2010 Vitor Sessak | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11535
diff
changeset
|
25 * @file |
10944 | 26 * (Inverse) Discrete Cosine Transforms. These are also known as the |
27 * type II and type III DCTs respectively. | |
28 */ | |
29 | |
30 #include <math.h> | |
11370 | 31 #include "libavutil/mathematics.h" |
32 #include "fft.h" | |
12099 | 33 #include "x86/fft.h" |
10944 | 34 |
12026
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
35 #define DCT32_FLOAT |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
36 #include "dct32.c" |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
37 |
10944 | 38 /* sin((M_PI * x / (2*n)) */ |
39 #define SIN(s,n,x) (s->costab[(n) - (x)]) | |
40 | |
41 /* cos((M_PI * x / (2*n)) */ | |
42 #define COS(s,n,x) (s->costab[x]) | |
43 | |
11535
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
44 static void ff_dst_calc_I_c(DCTContext *ctx, FFTSample *data) |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
45 { |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
46 int n = 1 << ctx->nbits; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
47 int i; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
48 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
49 data[0] = 0; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
50 for(i = 1; i < n/2; i++) { |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
51 float tmp1 = data[i ]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
52 float tmp2 = data[n - i]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
53 float s = SIN(ctx, n, 2*i); |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
54 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
55 s *= tmp1 + tmp2; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
56 tmp1 = (tmp1 - tmp2) * 0.5f; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
57 data[i ] = s + tmp1; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
58 data[n - i] = s - tmp1; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
59 } |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
60 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
61 data[n/2] *= 2; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
62 ff_rdft_calc(&ctx->rdft, data); |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
63 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
64 data[0] *= 0.5f; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
65 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
66 for(i = 1; i < n-2; i += 2) { |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
67 data[i + 1] += data[i - 1]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
68 data[i ] = -data[i + 2]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
69 } |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
70 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
71 data[n-1] = 0; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
72 } |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
73 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
74 static void ff_dct_calc_I_c(DCTContext *ctx, FFTSample *data) |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
75 { |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
76 int n = 1 << ctx->nbits; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
77 int i; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
78 float next = -0.5f * (data[0] - data[n]); |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
79 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
80 for(i = 0; i < n/2; i++) { |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
81 float tmp1 = data[i ]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
82 float tmp2 = data[n - i]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
83 float s = SIN(ctx, n, 2*i); |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
84 float c = COS(ctx, n, 2*i); |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
85 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
86 c *= tmp1 - tmp2; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
87 s *= tmp1 - tmp2; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
88 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
89 next += c; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
90 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
91 tmp1 = (tmp1 + tmp2) * 0.5f; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
92 data[i ] = tmp1 - s; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
93 data[n - i] = tmp1 + s; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
94 } |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
95 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
96 ff_rdft_calc(&ctx->rdft, data); |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
97 data[n] = data[1]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
98 data[1] = next; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
99 |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
100 for(i = 3; i <= n; i += 2) |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
101 data[i] = data[i - 2] - data[i]; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
102 } |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
103 |
11519
c091ab3b4135
Split DCT-II and DCT-III in different functions, they do not share any code.
vitor
parents:
11518
diff
changeset
|
104 static void ff_dct_calc_III_c(DCTContext *ctx, FFTSample *data) |
10944 | 105 { |
106 int n = 1 << ctx->nbits; | |
107 int i; | |
108 | |
11520 | 109 float next = data[n - 1]; |
110 float inv_n = 1.0f / n; | |
10944 | 111 |
11520 | 112 for (i = n - 2; i >= 2; i -= 2) { |
113 float val1 = data[i ]; | |
114 float val2 = data[i - 1] - data[i + 1]; | |
115 float c = COS(ctx, n, i); | |
116 float s = SIN(ctx, n, i); | |
10944 | 117 |
11520 | 118 data[i ] = c * val1 + s * val2; |
119 data[i + 1] = s * val1 - c * val2; | |
120 } | |
10944 | 121 |
11520 | 122 data[1] = 2 * next; |
10944 | 123 |
11520 | 124 ff_rdft_calc(&ctx->rdft, data); |
10944 | 125 |
11520 | 126 for (i = 0; i < n / 2; i++) { |
127 float tmp1 = data[i ] * inv_n; | |
128 float tmp2 = data[n - i - 1] * inv_n; | |
129 float csc = ctx->csc2[i] * (tmp1 - tmp2); | |
10944 | 130 |
11520 | 131 tmp1 += tmp2; |
132 data[i ] = tmp1 + csc; | |
133 data[n - i - 1] = tmp1 - csc; | |
134 } | |
11519
c091ab3b4135
Split DCT-II and DCT-III in different functions, they do not share any code.
vitor
parents:
11518
diff
changeset
|
135 } |
c091ab3b4135
Split DCT-II and DCT-III in different functions, they do not share any code.
vitor
parents:
11518
diff
changeset
|
136 |
c091ab3b4135
Split DCT-II and DCT-III in different functions, they do not share any code.
vitor
parents:
11518
diff
changeset
|
137 static void ff_dct_calc_II_c(DCTContext *ctx, FFTSample *data) |
c091ab3b4135
Split DCT-II and DCT-III in different functions, they do not share any code.
vitor
parents:
11518
diff
changeset
|
138 { |
c091ab3b4135
Split DCT-II and DCT-III in different functions, they do not share any code.
vitor
parents:
11518
diff
changeset
|
139 int n = 1 << ctx->nbits; |
c091ab3b4135
Split DCT-II and DCT-III in different functions, they do not share any code.
vitor
parents:
11518
diff
changeset
|
140 int i; |
11520 | 141 float next; |
142 | |
143 for (i=0; i < n/2; i++) { | |
144 float tmp1 = data[i ]; | |
145 float tmp2 = data[n - i - 1]; | |
146 float s = SIN(ctx, n, 2*i + 1); | |
10944 | 147 |
11520 | 148 s *= tmp1 - tmp2; |
149 tmp1 = (tmp1 + tmp2) * 0.5f; | |
10944 | 150 |
11520 | 151 data[i ] = tmp1 + s; |
152 data[n-i-1] = tmp1 - s; | |
153 } | |
10944 | 154 |
11520 | 155 ff_rdft_calc(&ctx->rdft, data); |
10944 | 156 |
11520 | 157 next = data[1] * 0.5; |
158 data[1] *= -1; | |
10944 | 159 |
11520 | 160 for (i = n - 2; i >= 0; i -= 2) { |
161 float inr = data[i ]; | |
162 float ini = data[i + 1]; | |
163 float c = COS(ctx, n, i); | |
164 float s = SIN(ctx, n, i); | |
10944 | 165 |
11520 | 166 data[i ] = c * inr + s * ini; |
10944 | 167 |
11520 | 168 data[i+1] = next; |
10944 | 169 |
11520 | 170 next += s * inr - c * ini; |
171 } | |
10944 | 172 } |
173 | |
12026
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
174 static void dct32_func(DCTContext *ctx, FFTSample *data) |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
175 { |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
176 ctx->dct32(data, data); |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
177 } |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
178 |
10944 | 179 void ff_dct_calc(DCTContext *s, FFTSample *data) |
180 { | |
11518
c4d18d452f82
Call DCT by function pointer. Needed for any future ASM implementation and
vitor
parents:
11517
diff
changeset
|
181 s->dct_calc(s, data); |
10944 | 182 } |
183 | |
11535
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
184 av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse) |
11517
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
185 { |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
186 int n = 1 << nbits; |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
187 int i; |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
188 |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
189 s->nbits = nbits; |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
190 s->inverse = inverse; |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
191 |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
192 ff_init_ff_cos_tabs(nbits+2); |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
193 |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
194 s->costab = ff_cos_tabs[nbits+2]; |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
195 |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
196 s->csc2 = av_malloc(n/2 * sizeof(FFTSample)); |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
197 |
11535
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
198 if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) { |
11517
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
199 av_free(s->csc2); |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
200 return -1; |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
201 } |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
202 |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
203 for (i = 0; i < n/2; i++) |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
204 s->csc2[i] = 0.5 / sin((M_PI / (2*n) * (2*i + 1))); |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
205 |
11535
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
206 switch(inverse) { |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
207 case DCT_I : s->dct_calc = ff_dct_calc_I_c; break; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
208 case DCT_II : s->dct_calc = ff_dct_calc_II_c ; break; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
209 case DCT_III: s->dct_calc = ff_dct_calc_III_c; break; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
210 case DST_I : s->dct_calc = ff_dst_calc_I_c; break; |
f468aac92300
Implement the discrete sine/cosine transforms DCT-I and DST-I
vitor
parents:
11520
diff
changeset
|
211 } |
12026
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
212 |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
213 if (inverse == DCT_II && nbits == 5) |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
214 s->dct_calc = dct32_func; |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
215 |
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
216 s->dct32 = dct32; |
12099 | 217 if (HAVE_MMX) ff_dct_init_mmx(s); |
12026
3f3d08bb5cf8
More mp{1,2,3} 32-point DCT transform to our common DCT framework.
vitor
parents:
11644
diff
changeset
|
218 |
11517
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
219 return 0; |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
220 } |
e3b680f6c106
Cosmetics: move ff_dct_init() to the bottom of the file
vitor
parents:
11370
diff
changeset
|
221 |
10944 | 222 av_cold void ff_dct_end(DCTContext *s) |
223 { | |
224 ff_rdft_end(&s->rdft); | |
225 av_free(s->csc2); | |
226 } |