annotate mp3lib/dct64_altivec.c @ 29998:0dacb57a3d3e

Filter list of missing source files so that it only contains nonexisting files. Thus those files can be symlinked directly without testing if they exist.
author diego
date Mon, 14 Dec 2009 12:03:02 +0000
parents 0f1b5b68af32
children 347d152a5cfa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
1
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
2 /*
11980
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
3 * Discrete Cosine Tansform (DCT) for Altivec
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
4 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
5 * based upon code from "mp3lib/dct64.c"
18848
2029204dd999 Add missing license, as chosen by its author
gpoirier
parents: 12131
diff changeset
6 * This file is free software; you can redistribute it and/or
2029204dd999 Add missing license, as chosen by its author
gpoirier
parents: 12131
diff changeset
7 * modify it under the terms of the GNU Lesser General Public License
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
8 */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
9
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
10 #define real float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
11
25341
baddb0681b2e Add missing #include <stdio.h>, fixes the warning:
diego
parents: 25328
diff changeset
12 #include <stdio.h>
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
13 #include "mpg123.h"
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
14
25328
6f0309e575e0 There is a check for altivec.h in configure so use the preprocessor directive
diego
parents: 25327
diff changeset
15 #ifdef HAVE_ALTIVEC_H
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
16 #include <altivec.h>
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
17 #endif
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
18
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
19 // used to build registers permutation vectors (vcprm)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
20 // the 's' are for words in the _s_econd vector
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
21 #define WORD_0 0x00,0x01,0x02,0x03
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
22 #define WORD_1 0x04,0x05,0x06,0x07
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
23 #define WORD_2 0x08,0x09,0x0a,0x0b
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
24 #define WORD_3 0x0c,0x0d,0x0e,0x0f
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
25 #define WORD_s0 0x10,0x11,0x12,0x13
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
26 #define WORD_s1 0x14,0x15,0x16,0x17
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
27 #define WORD_s2 0x18,0x19,0x1a,0x1b
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
28 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
29
27318
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
30 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
31 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
32
27318
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
33 #define FOUROF(a) {a,a,a,a}
25996
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
34
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
35 // vcprmle is used to keep the same index as in the SSE version.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
36 // it's the same as vcprm, with the index inversed
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
37 // ('le' is Little Endian)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
38 #define vcprmle(a,b,c,d) vcprm(d,c,b,a)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
39
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
40 // used to build inverse/identity vectors (vcii)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
41 // n is _n_egative, p is _p_ositive
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
42 #define FLOAT_n -1.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
43 #define FLOAT_p 1.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
44
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
45 void dct64_altivec(real *a,real *b,real *c)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
46 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
47 real __attribute__ ((aligned(16))) b1[0x20];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
48 real __attribute__ ((aligned(16))) b2[0x20];
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
49
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
50 real *out0 = a;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
51 real *out1 = b;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
52 real *samples = c;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
53
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
54 const vector float vczero = (const vector float)FOUROF(0.);
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
55 const vector unsigned char reverse = (const vector unsigned char)vcprm(3,2,1,0);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
56
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
57
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
58 if (((unsigned long)b1 & 0x0000000F) ||
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
59 ((unsigned long)b2 & 0x0000000F))
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
60
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
61 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
62 printf("MISALIGNED:\t%p\t%p\t%p\t%p\t%p\n",
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
63 b1, b2, a, b, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
64 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
65
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
66
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
67 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
68
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
69 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
70 register real *costab = mp3lib_pnts[0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
71
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
72 b1[0x00] = samples[0x00] + samples[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
73 b1[0x01] = samples[0x01] + samples[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
74 b1[0x02] = samples[0x02] + samples[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
75 b1[0x03] = samples[0x03] + samples[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
76 b1[0x04] = samples[0x04] + samples[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
77 b1[0x05] = samples[0x05] + samples[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
78 b1[0x06] = samples[0x06] + samples[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
79 b1[0x07] = samples[0x07] + samples[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
80 b1[0x08] = samples[0x08] + samples[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
81 b1[0x09] = samples[0x09] + samples[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
82 b1[0x0A] = samples[0x0A] + samples[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
83 b1[0x0B] = samples[0x0B] + samples[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
84 b1[0x0C] = samples[0x0C] + samples[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
85 b1[0x0D] = samples[0x0D] + samples[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
86 b1[0x0E] = samples[0x0E] + samples[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
87 b1[0x0F] = samples[0x0F] + samples[0x10];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
88 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
89 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
90 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
91 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
92 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
93 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
94 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
95 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
96 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
97 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
98 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
99 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
100 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
101 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
102 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
103 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
104
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
105 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
106 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
107 register real *costab = mp3lib_pnts[1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
108
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
109 b2[0x00] = b1[0x00] + b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
110 b2[0x01] = b1[0x01] + b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
111 b2[0x02] = b1[0x02] + b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
112 b2[0x03] = b1[0x03] + b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
113 b2[0x04] = b1[0x04] + b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
114 b2[0x05] = b1[0x05] + b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
115 b2[0x06] = b1[0x06] + b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
116 b2[0x07] = b1[0x07] + b1[0x08];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
117 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
118 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
119 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
120 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
121 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
122 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
123 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
124 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
125 b2[0x10] = b1[0x10] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
126 b2[0x11] = b1[0x11] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
127 b2[0x12] = b1[0x12] + b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
128 b2[0x13] = b1[0x13] + b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
129 b2[0x14] = b1[0x14] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
130 b2[0x15] = b1[0x15] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
131 b2[0x16] = b1[0x16] + b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
132 b2[0x17] = b1[0x17] + b1[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
133 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
134 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
135 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
136 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
137 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
138 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
139 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
140 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
141
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
142 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
143
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
144 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
145 register real *costab = mp3lib_pnts[2];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
146
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
147 b1[0x00] = b2[0x00] + b2[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
148 b1[0x01] = b2[0x01] + b2[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
149 b1[0x02] = b2[0x02] + b2[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
150 b1[0x03] = b2[0x03] + b2[0x04];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
151 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
152 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
153 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
154 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
155 b1[0x08] = b2[0x08] + b2[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
156 b1[0x09] = b2[0x09] + b2[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
157 b1[0x0A] = b2[0x0A] + b2[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
158 b1[0x0B] = b2[0x0B] + b2[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
159 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
160 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
161 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
162 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
163 b1[0x10] = b2[0x10] + b2[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
164 b1[0x11] = b2[0x11] + b2[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
165 b1[0x12] = b2[0x12] + b2[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
166 b1[0x13] = b2[0x13] + b2[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
167 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
168 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
169 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
170 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
171 b1[0x18] = b2[0x18] + b2[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
172 b1[0x19] = b2[0x19] + b2[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
173 b1[0x1A] = b2[0x1A] + b2[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
174 b1[0x1B] = b2[0x1B] + b2[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
175 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
176 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
177 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
178 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
179 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
180
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
181 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
182
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
183 // How does it work ?
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
184 // the first three passes are reproducted in the three block below
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
185 // all computations are done on a 4 elements vector
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
186 // 'reverse' is a special perumtation vector used to reverse
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
187 // the order of the elements inside a vector.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
188 // note that all loads/stores to b1 (b2) between passes 1 and 2 (2 and 3)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
189 // have been removed, all elements are stored inside b1vX (b2vX)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
190 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
191 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
192 b1v0, b1v1, b1v2, b1v3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
193 b1v4, b1v5, b1v6, b1v7;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
194 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
195 temp1, temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
196
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
197 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
198 register real *costab = mp3lib_pnts[0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
199
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
200 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
201 samplesv1, samplesv2, samplesv3, samplesv4,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
202 samplesv5, samplesv6, samplesv7, samplesv8,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
203 samplesv9;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
204 register vector unsigned char samples_perm = vec_lvsl(0, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
205 register vector float costabv1, costabv2, costabv3, costabv4, costabv5;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
206 register vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
207
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
208 samplesv1 = vec_ld(0, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
209 samplesv2 = vec_ld(16, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
210 samplesv1 = vec_perm(samplesv1, samplesv2, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
211 samplesv3 = vec_ld(32, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
212 samplesv2 = vec_perm(samplesv2, samplesv3, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
213 samplesv4 = vec_ld(48, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
214 samplesv3 = vec_perm(samplesv3, samplesv4, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
215 samplesv5 = vec_ld(64, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
216 samplesv4 = vec_perm(samplesv4, samplesv5, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
217 samplesv6 = vec_ld(80, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
218 samplesv5 = vec_perm(samplesv5, samplesv6, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
219 samplesv7 = vec_ld(96, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
220 samplesv6 = vec_perm(samplesv6, samplesv7, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
221 samplesv8 = vec_ld(112, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
222 samplesv7 = vec_perm(samplesv7, samplesv8, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
223 samplesv9 = vec_ld(128, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
224 samplesv8 = vec_perm(samplesv8, samplesv9, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
225
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
226 temp1 = vec_add(samplesv1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
227 vec_perm(samplesv8, samplesv8, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
228 //vec_st(temp1, 0, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
229 b1v0 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
230 temp1 = vec_add(samplesv2,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
231 vec_perm(samplesv7, samplesv7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
232 //vec_st(temp1, 16, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
233 b1v1 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
234 temp1 = vec_add(samplesv3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
235 vec_perm(samplesv6, samplesv6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
236 //vec_st(temp1, 32, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
237 b1v2 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
238 temp1 = vec_add(samplesv4,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
239 vec_perm(samplesv5, samplesv5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
240 //vec_st(temp1, 48, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
241 b1v3 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
242
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
243 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
244 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
245 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
246 costabv3 = vec_ld(32, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
247 costabv2 = vec_perm(costabv2, costabv3, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
248 costabv4 = vec_ld(48, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
249 costabv3 = vec_perm(costabv3, costabv4, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
250 costabv5 = vec_ld(64, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
251 costabv4 = vec_perm(costabv4, costabv5, costab_perm);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
252
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
253 temp1 = vec_sub(vec_perm(samplesv4, samplesv4, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
254 samplesv5);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
255 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
256 vec_perm(costabv4, costabv4, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
257 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
258 //vec_st(temp2, 64, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
259 b1v4 = temp2;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
260
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
261 temp1 = vec_sub(vec_perm(samplesv3, samplesv3, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
262 samplesv6);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
263 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
264 vec_perm(costabv3, costabv3, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
265 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
266 //vec_st(temp2, 80, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
267 b1v5 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
268 temp1 = vec_sub(vec_perm(samplesv2, samplesv2, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
269 samplesv7);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
270 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
271 vec_perm(costabv2, costabv2, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
272 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
273 //vec_st(temp2, 96, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
274 b1v6 = temp2;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
275
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
276 temp1 = vec_sub(vec_perm(samplesv1, samplesv1, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
277 samplesv8);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
278 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
279 vec_perm(costabv1, costabv1, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
280 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
281 //vec_st(temp2, 112, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
282 b1v7 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
283
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
284 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
285
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
286 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
287 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
288 b2v0, b2v1, b2v2, b2v3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
289 b2v4, b2v5, b2v6, b2v7;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
290 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
291 register real *costab = mp3lib_pnts[1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
292 register vector float costabv1r, costabv2r, costabv1, costabv2, costabv3;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
293 register vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
294
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
295 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
296 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
297 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
298 costabv3 = vec_ld(32, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
299 costabv2 = vec_perm(costabv2, costabv3 , costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
300 costabv1r = vec_perm(costabv1, costabv1, reverse);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
301 costabv2r = vec_perm(costabv2, costabv2, reverse);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
302
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
303 temp1 = vec_add(b1v0, vec_perm(b1v3, b1v3, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
304 //vec_st(temp1, 0, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
305 b2v0 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
306 temp1 = vec_add(b1v1, vec_perm(b1v2, b1v2, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
307 //vec_st(temp1, 16, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
308 b2v1 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
309 temp2 = vec_sub(vec_perm(b1v1, b1v1, reverse), b1v2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
310 temp1 = vec_madd(temp2, costabv2r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
311 //vec_st(temp1, 32, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
312 b2v2 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
313 temp2 = vec_sub(vec_perm(b1v0, b1v0, reverse), b1v3);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
314 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
315 //vec_st(temp1, 48, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
316 b2v3 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
317 temp1 = vec_add(b1v4, vec_perm(b1v7, b1v7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
318 //vec_st(temp1, 64, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
319 b2v4 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
320 temp1 = vec_add(b1v5, vec_perm(b1v6, b1v6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
321 //vec_st(temp1, 80, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
322 b2v5 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
323 temp2 = vec_sub(b1v6, vec_perm(b1v5, b1v5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
324 temp1 = vec_madd(temp2, costabv2r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
325 //vec_st(temp1, 96, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
326 b2v6 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
327 temp2 = vec_sub(b1v7, vec_perm(b1v4, b1v4, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
328 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
329 //vec_st(temp1, 112, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
330 b2v7 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
331 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
332
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
333 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
334 register real *costab = mp3lib_pnts[2];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
335
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
336
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
337 vector float costabv1r, costabv1, costabv2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
338 vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
339
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
340 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
341 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
342 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
343 costabv1r = vec_perm(costabv1, costabv1, reverse);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
344
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
345 temp1 = vec_add(b2v0, vec_perm(b2v1, b2v1, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
346 vec_st(temp1, 0, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
347 temp2 = vec_sub(vec_perm(b2v0, b2v0, reverse), b2v1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
348 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
349 vec_st(temp1, 16, b1);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
350
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
351 temp1 = vec_add(b2v2, vec_perm(b2v3, b2v3, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
352 vec_st(temp1, 32, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
353 temp2 = vec_sub(b2v3, vec_perm(b2v2, b2v2, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
354 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
355 vec_st(temp1, 48, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
356
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
357 temp1 = vec_add(b2v4, vec_perm(b2v5, b2v5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
358 vec_st(temp1, 64, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
359 temp2 = vec_sub(vec_perm(b2v4, b2v4, reverse), b2v5);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
360 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
361 vec_st(temp1, 80, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
362
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
363 temp1 = vec_add(b2v6, vec_perm(b2v7, b2v7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
364 vec_st(temp1, 96, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
365 temp2 = vec_sub(b2v7, vec_perm(b2v6, b2v6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
366 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
367 vec_st(temp1, 112, b1);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
368
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
369 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
370 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
371 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
372
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
373 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
374
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
375 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
376 register real const cos0 = mp3lib_pnts[3][0];
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
377 register real const cos1 = mp3lib_pnts[3][1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
378
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
379 b2[0x00] = b1[0x00] + b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
380 b2[0x01] = b1[0x01] + b1[0x02];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
381 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
382 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
383 b2[0x04] = b1[0x04] + b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
384 b2[0x05] = b1[0x05] + b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
385 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
386 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
387 b2[0x08] = b1[0x08] + b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
388 b2[0x09] = b1[0x09] + b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
389 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
390 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
391 b2[0x0C] = b1[0x0C] + b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
392 b2[0x0D] = b1[0x0D] + b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
393 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
394 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
395 b2[0x10] = b1[0x10] + b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
396 b2[0x11] = b1[0x11] + b1[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
397 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
398 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
399 b2[0x14] = b1[0x14] + b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
400 b2[0x15] = b1[0x15] + b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
401 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
402 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
403 b2[0x18] = b1[0x18] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
404 b2[0x19] = b1[0x19] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
405 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
406 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
407 b2[0x1C] = b1[0x1C] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
408 b2[0x1D] = b1[0x1D] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
409 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
410 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
411 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
412
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
413 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
414 register real const cos0 = mp3lib_pnts[4][0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
415
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
416 b1[0x00] = b2[0x00] + b2[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
417 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
418 b1[0x02] = b2[0x02] + b2[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
419 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
420 b1[0x02] += b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
421
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
422 b1[0x04] = b2[0x04] + b2[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
423 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
424 b1[0x06] = b2[0x06] + b2[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
425 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
426 b1[0x06] += b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
427 b1[0x04] += b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
428 b1[0x06] += b1[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
429 b1[0x05] += b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
430
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
431 b1[0x08] = b2[0x08] + b2[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
432 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
433 b1[0x0A] = b2[0x0A] + b2[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
434 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
435 b1[0x0A] += b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
436
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
437 b1[0x0C] = b2[0x0C] + b2[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
438 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
439 b1[0x0E] = b2[0x0E] + b2[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
440 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
441 b1[0x0E] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
442 b1[0x0C] += b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
443 b1[0x0E] += b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
444 b1[0x0D] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
445
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
446 b1[0x10] = b2[0x10] + b2[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
447 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
448 b1[0x12] = b2[0x12] + b2[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
449 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
450 b1[0x12] += b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
451
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
452 b1[0x14] = b2[0x14] + b2[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
453 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
454 b1[0x16] = b2[0x16] + b2[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
455 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
456 b1[0x16] += b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
457 b1[0x14] += b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
458 b1[0x16] += b1[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
459 b1[0x15] += b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
460
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
461 b1[0x18] = b2[0x18] + b2[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
462 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
463 b1[0x1A] = b2[0x1A] + b2[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
464 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
465 b1[0x1A] += b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
466
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
467 b1[0x1C] = b2[0x1C] + b2[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
468 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
469 b1[0x1E] = b2[0x1E] + b2[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
470 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
471 b1[0x1E] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
472 b1[0x1C] += b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
473 b1[0x1E] += b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
474 b1[0x1D] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
475 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
476
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
477 out0[0x10*16] = b1[0x00];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
478 out0[0x10*12] = b1[0x04];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
479 out0[0x10* 8] = b1[0x02];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
480 out0[0x10* 4] = b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
481 out0[0x10* 0] = b1[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
482 out1[0x10* 0] = b1[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
483 out1[0x10* 4] = b1[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
484 out1[0x10* 8] = b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
485 out1[0x10*12] = b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
486
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
487 b1[0x08] += b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
488 out0[0x10*14] = b1[0x08];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
489 b1[0x0C] += b1[0x0a];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
490 out0[0x10*10] = b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
491 b1[0x0A] += b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
492 out0[0x10* 6] = b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
493 b1[0x0E] += b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
494 out0[0x10* 2] = b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
495 b1[0x09] += b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
496 out1[0x10* 2] = b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
497 b1[0x0D] += b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
498 out1[0x10* 6] = b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
499 b1[0x0B] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
500 out1[0x10*10] = b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
501 out1[0x10*14] = b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
502
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
503 b1[0x18] += b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
504 out0[0x10*15] = b1[0x10] + b1[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
505 out0[0x10*13] = b1[0x18] + b1[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
506 b1[0x1C] += b1[0x1a];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
507 out0[0x10*11] = b1[0x14] + b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
508 out0[0x10* 9] = b1[0x1C] + b1[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
509 b1[0x1A] += b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
510 out0[0x10* 7] = b1[0x12] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
511 out0[0x10* 5] = b1[0x1A] + b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
512 b1[0x1E] += b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
513 out0[0x10* 3] = b1[0x16] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
514 out0[0x10* 1] = b1[0x1E] + b1[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
515 b1[0x19] += b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
516 out1[0x10* 1] = b1[0x11] + b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
517 out1[0x10* 3] = b1[0x19] + b1[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
518 b1[0x1D] += b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
519 out1[0x10* 5] = b1[0x15] + b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
520 out1[0x10* 7] = b1[0x1D] + b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
521 b1[0x1B] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
522 out1[0x10* 9] = b1[0x13] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
523 out1[0x10*11] = b1[0x1B] + b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
524 out1[0x10*13] = b1[0x17] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
525 out1[0x10*15] = b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
526 }