annotate i386/fft_sse.c @ 1795:920e6381e1fe libavcodec

2 byte shorter userdata for mpeg4 in the past it was startcode,string,00,7F,startcode now it is startcode,string,stratcode both are mpeg4 compliant, as according to the standard the userdata lasts until the next 00 00 01 (startcode prefix) but some very primitive decoders which simply skip until the first 00 byte and then expect the next valid startcode might fail with the old variant, just a theory though (didnt test if quicktime can decode it now)
author michael
date Sun, 08 Feb 2004 22:52:35 +0000
parents 64f1a11b5f86
children dd63cb7e5080
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
781
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
1 /*
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
2 * FFT/MDCT transform with SSE optimizations
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
3 * Copyright (c) 2002 Fabrice Bellard.
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
4 *
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
5 * This library is free software; you can redistribute it and/or
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
6 * modify it under the terms of the GNU Lesser General Public
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
7 * License as published by the Free Software Foundation; either
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
8 * version 2 of the License, or (at your option) any later version.
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
9 *
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
10 * This library is distributed in the hope that it will be useful,
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
13 * Lesser General Public License for more details.
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
14 *
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
15 * You should have received a copy of the GNU Lesser General Public
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
16 * License along with this library; if not, write to the Free Software
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
18 */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
19 #include "../dsputil.h"
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
20 #include <math.h>
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
21
968
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
22 #ifdef HAVE_BUILTIN_VECTOR
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
23
781
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
24 #include <xmmintrin.h>
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
25
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
26 static const float p1p1p1m1[4] __attribute__((aligned(16))) =
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
27 { 1.0, 1.0, 1.0, -1.0 };
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
28
968
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
29 static const float p1p1m1p1[4] __attribute__((aligned(16))) =
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
30 { 1.0, 1.0, -1.0, 1.0 };
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
31
781
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
32 static const float p1p1m1m1[4] __attribute__((aligned(16))) =
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
33 { 1.0, 1.0, -1.0, -1.0 };
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
34
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
35 #if 0
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
36 static void print_v4sf(const char *str, __m128 a)
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
37 {
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
38 float *p = (float *)&a;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
39 printf("%s: %f %f %f %f\n",
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
40 str, p[0], p[1], p[2], p[3]);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
41 }
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
42 #endif
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
43
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
44 /* XXX: handle reverse case */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
45 void fft_calc_sse(FFTContext *s, FFTComplex *z)
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
46 {
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
47 int ln = s->nbits;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
48 int j, np, np2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
49 int nblocks, nloops;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
50 register FFTComplex *p, *q;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
51 FFTComplex *cptr, *cptr1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
52 int k;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
53
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
54 np = 1 << ln;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
55
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
56 {
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
57 __m128 *r, a, b, a1, c1, c2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
58
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
59 r = (__m128 *)&z[0];
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
60 c1 = *(__m128 *)p1p1m1m1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
61 c2 = *(__m128 *)p1p1p1m1;
968
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
62 if (s->inverse)
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
63 c2 = *(__m128 *)p1p1m1p1;
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
64 else
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
65 c2 = *(__m128 *)p1p1p1m1;
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
66
781
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
67 j = (np >> 2);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
68 do {
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
69 a = r[0];
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
70 b = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2));
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
71 a = _mm_mul_ps(a, c1);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
72 /* do the pass 0 butterfly */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
73 a = _mm_add_ps(a, b);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
74
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
75 a1 = r[1];
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
76 b = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(1, 0, 3, 2));
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
77 a1 = _mm_mul_ps(a1, c1);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
78 /* do the pass 0 butterfly */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
79 b = _mm_add_ps(a1, b);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
80
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
81 /* multiply third by -i */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
82 b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 3, 1, 0));
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
83 b = _mm_mul_ps(b, c2);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
84
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
85 /* do the pass 1 butterfly */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
86 r[0] = _mm_add_ps(a, b);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
87 r[1] = _mm_sub_ps(a, b);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
88 r += 2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
89 } while (--j != 0);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
90 }
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
91 /* pass 2 .. ln-1 */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
92
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
93 nblocks = np >> 3;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
94 nloops = 1 << 2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
95 np2 = np >> 1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
96
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
97 cptr1 = s->exptab1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
98 do {
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
99 p = z;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
100 q = z + nloops;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
101 j = nblocks;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
102 do {
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
103 cptr = cptr1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
104 k = nloops >> 1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
105 do {
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
106 __m128 a, b, c, t1, t2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
107
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
108 a = *(__m128 *)p;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
109 b = *(__m128 *)q;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
110
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
111 /* complex mul */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
112 c = *(__m128 *)cptr;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
113 /* cre*re cim*re */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
114 t1 = _mm_mul_ps(c,
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
115 _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0)));
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
116 c = *(__m128 *)(cptr + 2);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
117 /* -cim*im cre*im */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
118 t2 = _mm_mul_ps(c,
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
119 _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1)));
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
120 b = _mm_add_ps(t1, t2);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
121
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
122 /* butterfly */
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
123 *(__m128 *)p = _mm_add_ps(a, b);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
124 *(__m128 *)q = _mm_sub_ps(a, b);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
125
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
126 p += 2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
127 q += 2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
128 cptr += 4;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
129 } while (--k);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
130
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
131 p += nloops;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
132 q += nloops;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
133 } while (--j);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
134 cptr1 += nloops * 2;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
135 nblocks = nblocks >> 1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
136 nloops = nloops << 1;
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
137 } while (nblocks != 0);
6f5e87957bcb new generic FFT/MDCT code for audio codecs
bellard
parents:
diff changeset
138 }
968
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
139
64f1a11b5f86 added define for builtins use - inverse fix by Romain Dolbeau
bellard
parents: 781
diff changeset
140 #endif