annotate mp3lib/dct64_altivec.c @ 34234:4ec96d5d2e4c

build: drop releaseclean target The target is supposed to remove files that are created during the XML build process without removing the generated documentation. Unfortunately, it does not work as expected and is not worth the extra complication.
author diego
date Mon, 07 Nov 2011 19:54:38 +0000
parents 347d152a5cfa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
1
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
2 /*
11980
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
3 * Discrete Cosine Tansform (DCT) for Altivec
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
4 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
5 * based upon code from "mp3lib/dct64.c"
18848
2029204dd999 Add missing license, as chosen by its author
gpoirier
parents: 12131
diff changeset
6 * This file is free software; you can redistribute it and/or
2029204dd999 Add missing license, as chosen by its author
gpoirier
parents: 12131
diff changeset
7 * modify it under the terms of the GNU Lesser General Public License
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
8 */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
9
25341
baddb0681b2e Add missing #include <stdio.h>, fixes the warning:
diego
parents: 25328
diff changeset
10 #include <stdio.h>
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
11 #include "mpg123.h"
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
12
25328
6f0309e575e0 There is a check for altivec.h in configure so use the preprocessor directive
diego
parents: 25327
diff changeset
13 #ifdef HAVE_ALTIVEC_H
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
14 #include <altivec.h>
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
15 #endif
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
16
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
17 // used to build registers permutation vectors (vcprm)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
18 // the 's' are for words in the _s_econd vector
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
19 #define WORD_0 0x00,0x01,0x02,0x03
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
20 #define WORD_1 0x04,0x05,0x06,0x07
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
21 #define WORD_2 0x08,0x09,0x0a,0x0b
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
22 #define WORD_3 0x0c,0x0d,0x0e,0x0f
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
23 #define WORD_s0 0x10,0x11,0x12,0x13
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
24 #define WORD_s1 0x14,0x15,0x16,0x17
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
25 #define WORD_s2 0x18,0x19,0x1a,0x1b
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
26 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
27
27318
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
28 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
29 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
30
27318
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
31 #define FOUROF(a) {a,a,a,a}
25996
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
32
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
33 // vcprmle is used to keep the same index as in the SSE version.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
34 // it's the same as vcprm, with the index inversed
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
35 // ('le' is Little Endian)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
36 #define vcprmle(a,b,c,d) vcprm(d,c,b,a)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
37
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
38 // used to build inverse/identity vectors (vcii)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
39 // n is _n_egative, p is _p_ositive
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
40 #define FLOAT_n -1.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
41 #define FLOAT_p 1.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
42
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
43 void dct64_altivec(real *a,real *b,real *c)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
44 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
45 real __attribute__ ((aligned(16))) b1[0x20];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
46 real __attribute__ ((aligned(16))) b2[0x20];
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
47
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
48 real *out0 = a;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
49 real *out1 = b;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
50 real *samples = c;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
51
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
52 const vector float vczero = (const vector float)FOUROF(0.);
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
53 const vector unsigned char reverse = (const vector unsigned char)vcprm(3,2,1,0);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
54
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
55
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
56 if (((unsigned long)b1 & 0x0000000F) ||
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
57 ((unsigned long)b2 & 0x0000000F))
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
58
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
59 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
60 printf("MISALIGNED:\t%p\t%p\t%p\t%p\t%p\n",
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
61 b1, b2, a, b, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
62 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
63
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
64
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
65 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
66
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
67 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
68 register real *costab = mp3lib_pnts[0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
69
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
70 b1[0x00] = samples[0x00] + samples[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
71 b1[0x01] = samples[0x01] + samples[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
72 b1[0x02] = samples[0x02] + samples[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
73 b1[0x03] = samples[0x03] + samples[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
74 b1[0x04] = samples[0x04] + samples[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
75 b1[0x05] = samples[0x05] + samples[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
76 b1[0x06] = samples[0x06] + samples[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
77 b1[0x07] = samples[0x07] + samples[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
78 b1[0x08] = samples[0x08] + samples[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
79 b1[0x09] = samples[0x09] + samples[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
80 b1[0x0A] = samples[0x0A] + samples[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
81 b1[0x0B] = samples[0x0B] + samples[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
82 b1[0x0C] = samples[0x0C] + samples[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
83 b1[0x0D] = samples[0x0D] + samples[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
84 b1[0x0E] = samples[0x0E] + samples[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
85 b1[0x0F] = samples[0x0F] + samples[0x10];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
86 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
87 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
88 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
89 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
90 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
91 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
92 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
93 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
94 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
95 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
96 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
97 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
98 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
99 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
100 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
101 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
102
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
103 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
104 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
105 register real *costab = mp3lib_pnts[1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
106
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
107 b2[0x00] = b1[0x00] + b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
108 b2[0x01] = b1[0x01] + b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
109 b2[0x02] = b1[0x02] + b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
110 b2[0x03] = b1[0x03] + b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
111 b2[0x04] = b1[0x04] + b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
112 b2[0x05] = b1[0x05] + b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
113 b2[0x06] = b1[0x06] + b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
114 b2[0x07] = b1[0x07] + b1[0x08];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
115 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
116 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
117 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
118 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
119 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
120 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
121 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
122 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
123 b2[0x10] = b1[0x10] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
124 b2[0x11] = b1[0x11] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
125 b2[0x12] = b1[0x12] + b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
126 b2[0x13] = b1[0x13] + b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
127 b2[0x14] = b1[0x14] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
128 b2[0x15] = b1[0x15] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
129 b2[0x16] = b1[0x16] + b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
130 b2[0x17] = b1[0x17] + b1[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
131 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
132 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
133 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
134 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
135 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
136 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
137 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
138 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
139
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
140 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
141
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
142 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
143 register real *costab = mp3lib_pnts[2];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
144
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
145 b1[0x00] = b2[0x00] + b2[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
146 b1[0x01] = b2[0x01] + b2[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
147 b1[0x02] = b2[0x02] + b2[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
148 b1[0x03] = b2[0x03] + b2[0x04];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
149 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
150 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
151 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
152 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
153 b1[0x08] = b2[0x08] + b2[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
154 b1[0x09] = b2[0x09] + b2[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
155 b1[0x0A] = b2[0x0A] + b2[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
156 b1[0x0B] = b2[0x0B] + b2[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
157 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
158 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
159 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
160 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
161 b1[0x10] = b2[0x10] + b2[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
162 b1[0x11] = b2[0x11] + b2[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
163 b1[0x12] = b2[0x12] + b2[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
164 b1[0x13] = b2[0x13] + b2[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
165 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
166 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
167 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
168 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
169 b1[0x18] = b2[0x18] + b2[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
170 b1[0x19] = b2[0x19] + b2[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
171 b1[0x1A] = b2[0x1A] + b2[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
172 b1[0x1B] = b2[0x1B] + b2[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
173 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
174 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
175 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
176 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
177 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
178
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
179 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
180
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
181 // How does it work ?
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
182 // the first three passes are reproducted in the three block below
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
183 // all computations are done on a 4 elements vector
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
184 // 'reverse' is a special perumtation vector used to reverse
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
185 // the order of the elements inside a vector.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
186 // note that all loads/stores to b1 (b2) between passes 1 and 2 (2 and 3)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
187 // have been removed, all elements are stored inside b1vX (b2vX)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
188 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
189 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
190 b1v0, b1v1, b1v2, b1v3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
191 b1v4, b1v5, b1v6, b1v7;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
192 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
193 temp1, temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
194
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
195 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
196 register real *costab = mp3lib_pnts[0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
197
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
198 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
199 samplesv1, samplesv2, samplesv3, samplesv4,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
200 samplesv5, samplesv6, samplesv7, samplesv8,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
201 samplesv9;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
202 register vector unsigned char samples_perm = vec_lvsl(0, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
203 register vector float costabv1, costabv2, costabv3, costabv4, costabv5;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
204 register vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
205
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
206 samplesv1 = vec_ld(0, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
207 samplesv2 = vec_ld(16, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
208 samplesv1 = vec_perm(samplesv1, samplesv2, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
209 samplesv3 = vec_ld(32, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
210 samplesv2 = vec_perm(samplesv2, samplesv3, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
211 samplesv4 = vec_ld(48, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
212 samplesv3 = vec_perm(samplesv3, samplesv4, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
213 samplesv5 = vec_ld(64, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
214 samplesv4 = vec_perm(samplesv4, samplesv5, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
215 samplesv6 = vec_ld(80, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
216 samplesv5 = vec_perm(samplesv5, samplesv6, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
217 samplesv7 = vec_ld(96, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
218 samplesv6 = vec_perm(samplesv6, samplesv7, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
219 samplesv8 = vec_ld(112, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
220 samplesv7 = vec_perm(samplesv7, samplesv8, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
221 samplesv9 = vec_ld(128, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
222 samplesv8 = vec_perm(samplesv8, samplesv9, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
223
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
224 temp1 = vec_add(samplesv1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
225 vec_perm(samplesv8, samplesv8, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
226 //vec_st(temp1, 0, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
227 b1v0 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
228 temp1 = vec_add(samplesv2,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
229 vec_perm(samplesv7, samplesv7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
230 //vec_st(temp1, 16, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
231 b1v1 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
232 temp1 = vec_add(samplesv3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
233 vec_perm(samplesv6, samplesv6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
234 //vec_st(temp1, 32, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
235 b1v2 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
236 temp1 = vec_add(samplesv4,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
237 vec_perm(samplesv5, samplesv5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
238 //vec_st(temp1, 48, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
239 b1v3 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
240
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
241 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
242 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
243 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
244 costabv3 = vec_ld(32, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
245 costabv2 = vec_perm(costabv2, costabv3, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
246 costabv4 = vec_ld(48, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
247 costabv3 = vec_perm(costabv3, costabv4, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
248 costabv5 = vec_ld(64, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
249 costabv4 = vec_perm(costabv4, costabv5, costab_perm);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
250
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
251 temp1 = vec_sub(vec_perm(samplesv4, samplesv4, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
252 samplesv5);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
253 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
254 vec_perm(costabv4, costabv4, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
255 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
256 //vec_st(temp2, 64, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
257 b1v4 = temp2;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
258
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
259 temp1 = vec_sub(vec_perm(samplesv3, samplesv3, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
260 samplesv6);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
261 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
262 vec_perm(costabv3, costabv3, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
263 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
264 //vec_st(temp2, 80, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
265 b1v5 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
266 temp1 = vec_sub(vec_perm(samplesv2, samplesv2, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
267 samplesv7);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
268 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
269 vec_perm(costabv2, costabv2, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
270 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
271 //vec_st(temp2, 96, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
272 b1v6 = temp2;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
273
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
274 temp1 = vec_sub(vec_perm(samplesv1, samplesv1, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
275 samplesv8);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
276 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
277 vec_perm(costabv1, costabv1, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
278 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
279 //vec_st(temp2, 112, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
280 b1v7 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
281
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
282 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
283
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
284 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
285 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
286 b2v0, b2v1, b2v2, b2v3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
287 b2v4, b2v5, b2v6, b2v7;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
288 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
289 register real *costab = mp3lib_pnts[1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
290 register vector float costabv1r, costabv2r, costabv1, costabv2, costabv3;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
291 register vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
292
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
293 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
294 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
295 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
296 costabv3 = vec_ld(32, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
297 costabv2 = vec_perm(costabv2, costabv3 , costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
298 costabv1r = vec_perm(costabv1, costabv1, reverse);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
299 costabv2r = vec_perm(costabv2, costabv2, reverse);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
300
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
301 temp1 = vec_add(b1v0, vec_perm(b1v3, b1v3, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
302 //vec_st(temp1, 0, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
303 b2v0 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
304 temp1 = vec_add(b1v1, vec_perm(b1v2, b1v2, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
305 //vec_st(temp1, 16, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
306 b2v1 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
307 temp2 = vec_sub(vec_perm(b1v1, b1v1, reverse), b1v2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
308 temp1 = vec_madd(temp2, costabv2r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
309 //vec_st(temp1, 32, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
310 b2v2 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
311 temp2 = vec_sub(vec_perm(b1v0, b1v0, reverse), b1v3);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
312 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
313 //vec_st(temp1, 48, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
314 b2v3 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
315 temp1 = vec_add(b1v4, vec_perm(b1v7, b1v7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
316 //vec_st(temp1, 64, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
317 b2v4 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
318 temp1 = vec_add(b1v5, vec_perm(b1v6, b1v6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
319 //vec_st(temp1, 80, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
320 b2v5 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
321 temp2 = vec_sub(b1v6, vec_perm(b1v5, b1v5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
322 temp1 = vec_madd(temp2, costabv2r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
323 //vec_st(temp1, 96, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
324 b2v6 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
325 temp2 = vec_sub(b1v7, vec_perm(b1v4, b1v4, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
326 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
327 //vec_st(temp1, 112, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
328 b2v7 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
329 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
330
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
331 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
332 register real *costab = mp3lib_pnts[2];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
333
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
334
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
335 vector float costabv1r, costabv1, costabv2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
336 vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
337
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
338 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
339 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
340 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
341 costabv1r = vec_perm(costabv1, costabv1, reverse);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
342
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
343 temp1 = vec_add(b2v0, vec_perm(b2v1, b2v1, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
344 vec_st(temp1, 0, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
345 temp2 = vec_sub(vec_perm(b2v0, b2v0, reverse), b2v1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
346 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
347 vec_st(temp1, 16, b1);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
348
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
349 temp1 = vec_add(b2v2, vec_perm(b2v3, b2v3, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
350 vec_st(temp1, 32, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
351 temp2 = vec_sub(b2v3, vec_perm(b2v2, b2v2, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
352 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
353 vec_st(temp1, 48, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
354
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
355 temp1 = vec_add(b2v4, vec_perm(b2v5, b2v5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
356 vec_st(temp1, 64, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
357 temp2 = vec_sub(vec_perm(b2v4, b2v4, reverse), b2v5);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
358 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
359 vec_st(temp1, 80, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
360
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
361 temp1 = vec_add(b2v6, vec_perm(b2v7, b2v7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
362 vec_st(temp1, 96, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
363 temp2 = vec_sub(b2v7, vec_perm(b2v6, b2v6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
364 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
365 vec_st(temp1, 112, b1);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 27318
diff changeset
366
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
367 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
368 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
369 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
370
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
371 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
372
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
373 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
374 register real const cos0 = mp3lib_pnts[3][0];
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
375 register real const cos1 = mp3lib_pnts[3][1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
376
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
377 b2[0x00] = b1[0x00] + b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
378 b2[0x01] = b1[0x01] + b1[0x02];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
379 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
380 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
381 b2[0x04] = b1[0x04] + b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
382 b2[0x05] = b1[0x05] + b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
383 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
384 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
385 b2[0x08] = b1[0x08] + b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
386 b2[0x09] = b1[0x09] + b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
387 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
388 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
389 b2[0x0C] = b1[0x0C] + b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
390 b2[0x0D] = b1[0x0D] + b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
391 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
392 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
393 b2[0x10] = b1[0x10] + b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
394 b2[0x11] = b1[0x11] + b1[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
395 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
396 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
397 b2[0x14] = b1[0x14] + b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
398 b2[0x15] = b1[0x15] + b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
399 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
400 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
401 b2[0x18] = b1[0x18] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
402 b2[0x19] = b1[0x19] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
403 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
404 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
405 b2[0x1C] = b1[0x1C] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
406 b2[0x1D] = b1[0x1D] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
407 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
408 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
409 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
410
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
411 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
412 register real const cos0 = mp3lib_pnts[4][0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
413
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
414 b1[0x00] = b2[0x00] + b2[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
415 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
416 b1[0x02] = b2[0x02] + b2[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
417 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
418 b1[0x02] += b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
419
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
420 b1[0x04] = b2[0x04] + b2[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
421 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
422 b1[0x06] = b2[0x06] + b2[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
423 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
424 b1[0x06] += b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
425 b1[0x04] += b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
426 b1[0x06] += b1[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
427 b1[0x05] += b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
428
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
429 b1[0x08] = b2[0x08] + b2[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
430 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
431 b1[0x0A] = b2[0x0A] + b2[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
432 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
433 b1[0x0A] += b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
434
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
435 b1[0x0C] = b2[0x0C] + b2[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
436 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
437 b1[0x0E] = b2[0x0E] + b2[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
438 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
439 b1[0x0E] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
440 b1[0x0C] += b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
441 b1[0x0E] += b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
442 b1[0x0D] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
443
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
444 b1[0x10] = b2[0x10] + b2[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
445 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
446 b1[0x12] = b2[0x12] + b2[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
447 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
448 b1[0x12] += b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
449
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
450 b1[0x14] = b2[0x14] + b2[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
451 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
452 b1[0x16] = b2[0x16] + b2[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
453 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
454 b1[0x16] += b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
455 b1[0x14] += b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
456 b1[0x16] += b1[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
457 b1[0x15] += b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
458
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
459 b1[0x18] = b2[0x18] + b2[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
460 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
461 b1[0x1A] = b2[0x1A] + b2[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
462 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
463 b1[0x1A] += b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
464
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
465 b1[0x1C] = b2[0x1C] + b2[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
466 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
467 b1[0x1E] = b2[0x1E] + b2[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
468 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
469 b1[0x1E] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
470 b1[0x1C] += b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
471 b1[0x1E] += b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
472 b1[0x1D] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
473 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
474
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
475 out0[0x10*16] = b1[0x00];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
476 out0[0x10*12] = b1[0x04];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
477 out0[0x10* 8] = b1[0x02];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
478 out0[0x10* 4] = b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
479 out0[0x10* 0] = b1[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
480 out1[0x10* 0] = b1[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
481 out1[0x10* 4] = b1[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
482 out1[0x10* 8] = b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
483 out1[0x10*12] = b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
484
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
485 b1[0x08] += b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
486 out0[0x10*14] = b1[0x08];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
487 b1[0x0C] += b1[0x0a];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
488 out0[0x10*10] = b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
489 b1[0x0A] += b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
490 out0[0x10* 6] = b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
491 b1[0x0E] += b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
492 out0[0x10* 2] = b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
493 b1[0x09] += b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
494 out1[0x10* 2] = b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
495 b1[0x0D] += b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
496 out1[0x10* 6] = b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
497 b1[0x0B] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
498 out1[0x10*10] = b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
499 out1[0x10*14] = b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
500
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
501 b1[0x18] += b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
502 out0[0x10*15] = b1[0x10] + b1[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
503 out0[0x10*13] = b1[0x18] + b1[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
504 b1[0x1C] += b1[0x1a];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
505 out0[0x10*11] = b1[0x14] + b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
506 out0[0x10* 9] = b1[0x1C] + b1[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
507 b1[0x1A] += b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
508 out0[0x10* 7] = b1[0x12] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
509 out0[0x10* 5] = b1[0x1A] + b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
510 b1[0x1E] += b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
511 out0[0x10* 3] = b1[0x16] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
512 out0[0x10* 1] = b1[0x1E] + b1[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
513 b1[0x19] += b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
514 out1[0x10* 1] = b1[0x11] + b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
515 out1[0x10* 3] = b1[0x19] + b1[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
516 b1[0x1D] += b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
517 out1[0x10* 5] = b1[0x15] + b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
518 out1[0x10* 7] = b1[0x1D] + b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
519 b1[0x1B] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
520 out1[0x10* 9] = b1[0x13] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
521 out1[0x10*11] = b1[0x1B] + b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
522 out1[0x10*13] = b1[0x17] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
523 out1[0x10*15] = b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
524 }