annotate ppc/fft_altivec.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 5638941ec8ef
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
1 /*
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
2 * FFT/IFFT transforms
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
3 * AltiVec-enabled
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents: 11969
diff changeset
4 * Copyright (c) 2009 Loren Merritt
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
5 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
7 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
12 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
16 * Lesser General Public License for more details.
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
17 *
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
21 */
11370
4b3da727d832 Move FFT parts from dsputil.h to fft.h
mru
parents: 10175
diff changeset
22 #include "libavcodec/fft.h"
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents: 5010
diff changeset
23 #include "util_altivec.h"
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents: 11969
diff changeset
24 #include "types_altivec.h"
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 11370
diff changeset
25
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
26 /**
1879
dd63cb7e5080 fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents: 1352
diff changeset
27 * Do a complex FFT with the parameters defined in ff_fft_init(). The
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
28 * input data must be permuted before with s->revtab table. No
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
29 * 1.0/sqrt(n) normalization is done.
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
30 * AltiVec-enabled
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
31 * This code assumes that the 'z' pointer is 16 bytes-aligned
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
32 * It also assumes all FFTComplex are 8 bytes-aligned pair of float
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
33 */
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents: 11969
diff changeset
34
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12085
diff changeset
35 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12085
diff changeset
36 void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z);
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents: 11969
diff changeset
37
12049
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
38 #if HAVE_GNU_AS
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
39 static void ff_imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
40 {
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
41 int j, k;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
42 int n = 1 << s->mdct_bits;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
43 int n4 = n >> 2;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
44 int n8 = n >> 3;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
45 int n32 = n >> 5;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
46 const uint16_t *revtabj = s->revtab;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
47 const uint16_t *revtabk = s->revtab+n4;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
48 const vec_f *tcos = (const vec_f*)(s->tcos+n8);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
49 const vec_f *tsin = (const vec_f*)(s->tsin+n8);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
50 const vec_f *pin = (const vec_f*)(input+n4);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
51 vec_f *pout = (vec_f*)(output+n4);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
52
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
53 /* pre rotation */
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
54 k = n32-1;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
55 do {
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
56 vec_f cos,sin,cos0,sin0,cos1,sin1,re,im,r0,i0,r1,i1,a,b,c,d;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
57 #define CMULA(p,o0,o1,o2,o3)\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
58 a = pin[ k*2+p]; /* { z[k].re, z[k].im, z[k+1].re, z[k+1].im } */\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
59 b = pin[-k*2-p-1]; /* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
60 re = vec_perm(a, b, vcprm(0,2,s0,s2)); /* { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } */\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
61 im = vec_perm(a, b, vcprm(s3,s1,3,1)); /* { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } */\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
62 cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3)); /* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
63 sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
64 r##p = im*cos - re*sin;\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
65 i##p = re*cos + im*sin;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
66 #define STORE2(v,dst)\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
67 j = dst;\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
68 vec_ste(v, 0, output+j*2);\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
69 vec_ste(v, 4, output+j*2);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
70 #define STORE8(p)\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
71 a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
72 b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
73 c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
74 d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
75 STORE2(a, revtabk[ p*2-4]);\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
76 STORE2(b, revtabk[ p*2-3]);\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
77 STORE2(c, revtabj[-p*2+2]);\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
78 STORE2(d, revtabj[-p*2+3]);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
79
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
80 cos0 = tcos[k];
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
81 sin0 = tsin[k];
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
82 cos1 = tcos[-k-1];
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
83 sin1 = tsin[-k-1];
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
84 CMULA(0, 0,1,2,3);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
85 CMULA(1, 2,3,0,1);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
86 STORE8(0);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
87 STORE8(1);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
88 revtabj += 4;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
89 revtabk -= 4;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
90 k--;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
91 } while(k >= 0);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
92
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12085
diff changeset
93 ff_fft_calc_altivec(s, (FFTComplex*)output);
12049
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
94
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
95 /* post rotation + reordering */
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
96 j = -n32;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
97 k = n32-1;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
98 do {
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
99 vec_f cos,sin,re,im,a,b,c,d;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
100 #define CMULB(d0,d1,o)\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
101 re = pout[o*2];\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
102 im = pout[o*2+1];\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
103 cos = tcos[o];\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
104 sin = tsin[o];\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
105 d0 = im*sin - re*cos;\
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
106 d1 = re*sin + im*cos;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
107
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
108 CMULB(a,b,j);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
109 CMULB(c,d,k);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
110 pout[2*j] = vec_perm(a, d, vcprm(0,s3,1,s2));
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
111 pout[2*j+1] = vec_perm(a, d, vcprm(2,s1,3,s0));
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
112 pout[2*k] = vec_perm(c, b, vcprm(0,s3,1,s2));
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
113 pout[2*k+1] = vec_perm(c, b, vcprm(2,s1,3,s0));
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
114 j++;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
115 k--;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
116 } while(k >= 0);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
117 }
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
118
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
119 static void ff_imdct_calc_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
120 {
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
121 int k;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
122 int n = 1 << s->mdct_bits;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
123 int n4 = n >> 2;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
124 int n16 = n >> 4;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
125 vec_u32 sign = {1<<31,1<<31,1<<31,1<<31};
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
126 vec_u32 *p0 = (vec_u32*)(output+n4);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
127 vec_u32 *p1 = (vec_u32*)(output+n4*3);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
128
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
129 ff_imdct_half_altivec(s, output+n4, input);
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
130
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
131 for (k = 0; k < n16; k++) {
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
132 vec_u32 a = p0[k] ^ sign;
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
133 vec_u32 b = p1[-k-1];
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
134 p0[-k-1] = vec_perm(a, a, vcprm(3,2,1,0));
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
135 p1[k] = vec_perm(b, b, vcprm(3,2,1,0));
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
136 }
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
137 }
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
138 #endif /* HAVE_GNU_AS */
e6305257ceb6 PPC: Altivec IMDCT
mru
parents: 12047
diff changeset
139
10175
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents: 9364
diff changeset
140 av_cold void ff_fft_init_altivec(FFTContext *s)
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents: 9364
diff changeset
141 {
12052
c7c32da068ab PPC: fix build on OSX without gas-preprocessor
mru
parents: 12049
diff changeset
142 #if HAVE_GNU_AS
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12085
diff changeset
143 s->fft_calc = ff_fft_calc_interleave_altivec;
12052
c7c32da068ab PPC: fix build on OSX without gas-preprocessor
mru
parents: 12049
diff changeset
144 s->imdct_calc = ff_imdct_calc_altivec;
c7c32da068ab PPC: fix build on OSX without gas-preprocessor
mru
parents: 12049
diff changeset
145 s->imdct_half = ff_imdct_half_altivec;
c7c32da068ab PPC: fix build on OSX without gas-preprocessor
mru
parents: 12049
diff changeset
146 #endif
10175
5cf49858179a Move per-arch fft init bits into the corresponding subdirs
mru
parents: 9364
diff changeset
147 }