annotate mp3lib/dct64_altivec.c @ 27236:9cbd4395dfca

Dependency files should be refreshed when object files are rebuilt. Express this with Makefile syntax instead of in the dependency file generation command.
author diego
date Sat, 12 Jul 2008 08:23:10 +0000
parents 2aadf9302854
children bb5ed9aa34fc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
1
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
2 /*
11980
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
3 * Discrete Cosine Tansform (DCT) for Altivec
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
4 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
72256bd9557b copyright update by the author
diego
parents: 9122
diff changeset
5 * based upon code from "mp3lib/dct64.c"
18848
2029204dd999 Add missing license, as chosen by its author
gpoirier
parents: 12131
diff changeset
6 * This file is free software; you can redistribute it and/or
2029204dd999 Add missing license, as chosen by its author
gpoirier
parents: 12131
diff changeset
7 * modify it under the terms of the GNU Lesser General Public License
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
8 */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
9
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
10 #define real float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
11
25341
baddb0681b2e Add missing #include <stdio.h>, fixes the warning:
diego
parents: 25328
diff changeset
12 #include <stdio.h>
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
13 #include "mpg123.h"
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
14
25328
6f0309e575e0 There is a check for altivec.h in configure so use the preprocessor directive
diego
parents: 25327
diff changeset
15 #ifdef HAVE_ALTIVEC_H
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
16 #include <altivec.h>
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
17 #endif
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
18
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
19 // used to build registers permutation vectors (vcprm)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
20 // the 's' are for words in the _s_econd vector
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
21 #define WORD_0 0x00,0x01,0x02,0x03
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
22 #define WORD_1 0x04,0x05,0x06,0x07
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
23 #define WORD_2 0x08,0x09,0x0a,0x0b
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
24 #define WORD_3 0x0c,0x0d,0x0e,0x0f
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
25 #define WORD_s0 0x10,0x11,0x12,0x13
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
26 #define WORD_s1 0x14,0x15,0x16,0x17
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
27 #define WORD_s2 0x18,0x19,0x1a,0x1b
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
28 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
29
26895
2aadf9302854 Check for HAVE_ALTIVEC_VECTOR_BRACES instead of __APPLE_CC__.
diego
parents: 25996
diff changeset
30 #ifdef HAVE_ALTIVEC_VECTOR_BRACES
2aadf9302854 Check for HAVE_ALTIVEC_VECTOR_BRACES instead of __APPLE_CC__.
diego
parents: 25996
diff changeset
31 #define AVV(x...) {x}
2aadf9302854 Check for HAVE_ALTIVEC_VECTOR_BRACES instead of __APPLE_CC__.
diego
parents: 25996
diff changeset
32 #else
25996
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
33 #define AVV(x...) (x)
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
34 #endif
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
35
25996
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
36 #define vcprm(a,b,c,d) (const vector unsigned char)AVV(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
37 #define vcii(a,b,c,d) (const vector float)AVV(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
38
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
39 #define FOUROF(a) AVV(a,a,a,a)
dcb28cc9ca2c Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25341
diff changeset
40
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
41 // vcprmle is used to keep the same index as in the SSE version.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
42 // it's the same as vcprm, with the index inversed
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
43 // ('le' is Little Endian)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
44 #define vcprmle(a,b,c,d) vcprm(d,c,b,a)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
45
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
46 // used to build inverse/identity vectors (vcii)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
47 // n is _n_egative, p is _p_ositive
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
48 #define FLOAT_n -1.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
49 #define FLOAT_p 1.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
50
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
51 void dct64_altivec(real *a,real *b,real *c)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
52 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
53 real __attribute__ ((aligned(16))) b1[0x20];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
54 real __attribute__ ((aligned(16))) b2[0x20];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
55
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
56 real *out0 = a;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
57 real *out1 = b;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
58 real *samples = c;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
59
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9002
diff changeset
60 const vector float vczero = (const vector float)FOUROF(0.);
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
61 const vector unsigned char reverse = (const vector unsigned char)vcprm(3,2,1,0);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
62
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
63
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
64 if (((unsigned long)b1 & 0x0000000F) ||
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
65 ((unsigned long)b2 & 0x0000000F))
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
66
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
67 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
68 printf("MISALIGNED:\t%p\t%p\t%p\t%p\t%p\n",
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
69 b1, b2, a, b, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
70 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
71
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
72
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
73 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
74
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
75 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
76 register real *costab = mp3lib_pnts[0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
77
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
78 b1[0x00] = samples[0x00] + samples[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
79 b1[0x01] = samples[0x01] + samples[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
80 b1[0x02] = samples[0x02] + samples[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
81 b1[0x03] = samples[0x03] + samples[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
82 b1[0x04] = samples[0x04] + samples[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
83 b1[0x05] = samples[0x05] + samples[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
84 b1[0x06] = samples[0x06] + samples[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
85 b1[0x07] = samples[0x07] + samples[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
86 b1[0x08] = samples[0x08] + samples[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
87 b1[0x09] = samples[0x09] + samples[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
88 b1[0x0A] = samples[0x0A] + samples[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
89 b1[0x0B] = samples[0x0B] + samples[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
90 b1[0x0C] = samples[0x0C] + samples[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
91 b1[0x0D] = samples[0x0D] + samples[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
92 b1[0x0E] = samples[0x0E] + samples[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
93 b1[0x0F] = samples[0x0F] + samples[0x10];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
94 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
95 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
96 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
97 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
98 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
99 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
100 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
101 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
102 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
103 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
104 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
105 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
106 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
107 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
108 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
109 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
110
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
111 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
112 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
113 register real *costab = mp3lib_pnts[1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
114
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
115 b2[0x00] = b1[0x00] + b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
116 b2[0x01] = b1[0x01] + b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
117 b2[0x02] = b1[0x02] + b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
118 b2[0x03] = b1[0x03] + b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
119 b2[0x04] = b1[0x04] + b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
120 b2[0x05] = b1[0x05] + b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
121 b2[0x06] = b1[0x06] + b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
122 b2[0x07] = b1[0x07] + b1[0x08];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
123 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
124 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
125 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
126 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
127 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
128 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
129 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
130 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
131 b2[0x10] = b1[0x10] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
132 b2[0x11] = b1[0x11] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
133 b2[0x12] = b1[0x12] + b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
134 b2[0x13] = b1[0x13] + b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
135 b2[0x14] = b1[0x14] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
136 b2[0x15] = b1[0x15] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
137 b2[0x16] = b1[0x16] + b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
138 b2[0x17] = b1[0x17] + b1[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
139 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
140 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
141 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
142 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
143 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
144 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
145 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
146 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
147
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
148 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
149
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
150 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
151 register real *costab = mp3lib_pnts[2];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
152
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
153 b1[0x00] = b2[0x00] + b2[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
154 b1[0x01] = b2[0x01] + b2[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
155 b1[0x02] = b2[0x02] + b2[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
156 b1[0x03] = b2[0x03] + b2[0x04];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
157 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
158 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
159 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
160 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
161 b1[0x08] = b2[0x08] + b2[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
162 b1[0x09] = b2[0x09] + b2[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
163 b1[0x0A] = b2[0x0A] + b2[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
164 b1[0x0B] = b2[0x0B] + b2[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
165 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
166 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
167 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
168 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
169 b1[0x10] = b2[0x10] + b2[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
170 b1[0x11] = b2[0x11] + b2[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
171 b1[0x12] = b2[0x12] + b2[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
172 b1[0x13] = b2[0x13] + b2[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
173 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
174 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
175 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
176 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
177 b1[0x18] = b2[0x18] + b2[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
178 b1[0x19] = b2[0x19] + b2[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
179 b1[0x1A] = b2[0x1A] + b2[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
180 b1[0x1B] = b2[0x1B] + b2[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
181 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
182 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
183 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
184 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
185 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
186
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
187 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
188
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
189 // How does it work ?
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
190 // the first three passes are reproducted in the three block below
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
191 // all computations are done on a 4 elements vector
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
192 // 'reverse' is a special perumtation vector used to reverse
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
193 // the order of the elements inside a vector.
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
194 // note that all loads/stores to b1 (b2) between passes 1 and 2 (2 and 3)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
195 // have been removed, all elements are stored inside b1vX (b2vX)
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
196 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
197 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
198 b1v0, b1v1, b1v2, b1v3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
199 b1v4, b1v5, b1v6, b1v7;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
200 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
201 temp1, temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
202
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
203 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
204 register real *costab = mp3lib_pnts[0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
205
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
206 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
207 samplesv1, samplesv2, samplesv3, samplesv4,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
208 samplesv5, samplesv6, samplesv7, samplesv8,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
209 samplesv9;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
210 register vector unsigned char samples_perm = vec_lvsl(0, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
211 register vector float costabv1, costabv2, costabv3, costabv4, costabv5;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
212 register vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
213
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
214 samplesv1 = vec_ld(0, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
215 samplesv2 = vec_ld(16, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
216 samplesv1 = vec_perm(samplesv1, samplesv2, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
217 samplesv3 = vec_ld(32, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
218 samplesv2 = vec_perm(samplesv2, samplesv3, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
219 samplesv4 = vec_ld(48, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
220 samplesv3 = vec_perm(samplesv3, samplesv4, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
221 samplesv5 = vec_ld(64, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
222 samplesv4 = vec_perm(samplesv4, samplesv5, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
223 samplesv6 = vec_ld(80, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
224 samplesv5 = vec_perm(samplesv5, samplesv6, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
225 samplesv7 = vec_ld(96, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
226 samplesv6 = vec_perm(samplesv6, samplesv7, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
227 samplesv8 = vec_ld(112, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
228 samplesv7 = vec_perm(samplesv7, samplesv8, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
229 samplesv9 = vec_ld(128, samples);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
230 samplesv8 = vec_perm(samplesv8, samplesv9, samples_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
231
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
232 temp1 = vec_add(samplesv1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
233 vec_perm(samplesv8, samplesv8, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
234 //vec_st(temp1, 0, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
235 b1v0 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
236 temp1 = vec_add(samplesv2,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
237 vec_perm(samplesv7, samplesv7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
238 //vec_st(temp1, 16, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
239 b1v1 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
240 temp1 = vec_add(samplesv3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
241 vec_perm(samplesv6, samplesv6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
242 //vec_st(temp1, 32, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
243 b1v2 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
244 temp1 = vec_add(samplesv4,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
245 vec_perm(samplesv5, samplesv5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
246 //vec_st(temp1, 48, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
247 b1v3 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
248
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
249 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
250 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
251 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
252 costabv3 = vec_ld(32, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
253 costabv2 = vec_perm(costabv2, costabv3, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
254 costabv4 = vec_ld(48, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
255 costabv3 = vec_perm(costabv3, costabv4, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
256 costabv5 = vec_ld(64, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
257 costabv4 = vec_perm(costabv4, costabv5, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
258
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
259 temp1 = vec_sub(vec_perm(samplesv4, samplesv4, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
260 samplesv5);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
261 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
262 vec_perm(costabv4, costabv4, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
263 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
264 //vec_st(temp2, 64, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
265 b1v4 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
266
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
267 temp1 = vec_sub(vec_perm(samplesv3, samplesv3, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
268 samplesv6);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
269 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
270 vec_perm(costabv3, costabv3, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
271 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
272 //vec_st(temp2, 80, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
273 b1v5 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
274 temp1 = vec_sub(vec_perm(samplesv2, samplesv2, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
275 samplesv7);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
276 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
277 vec_perm(costabv2, costabv2, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
278 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
279 //vec_st(temp2, 96, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
280 b1v6 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
281
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
282 temp1 = vec_sub(vec_perm(samplesv1, samplesv1, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
283 samplesv8);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
284 temp2 = vec_madd(temp1,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
285 vec_perm(costabv1, costabv1, reverse),
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
286 vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
287 //vec_st(temp2, 112, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
288 b1v7 = temp2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
289
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
290 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
291
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
292 {
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
293 register vector float
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
294 b2v0, b2v1, b2v2, b2v3,
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
295 b2v4, b2v5, b2v6, b2v7;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
296 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
297 register real *costab = mp3lib_pnts[1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
298 register vector float costabv1r, costabv2r, costabv1, costabv2, costabv3;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
299 register vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
300
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
301 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
302 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
303 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
304 costabv3 = vec_ld(32, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
305 costabv2 = vec_perm(costabv2, costabv3 , costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
306 costabv1r = vec_perm(costabv1, costabv1, reverse);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
307 costabv2r = vec_perm(costabv2, costabv2, reverse);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
308
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
309 temp1 = vec_add(b1v0, vec_perm(b1v3, b1v3, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
310 //vec_st(temp1, 0, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
311 b2v0 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
312 temp1 = vec_add(b1v1, vec_perm(b1v2, b1v2, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
313 //vec_st(temp1, 16, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
314 b2v1 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
315 temp2 = vec_sub(vec_perm(b1v1, b1v1, reverse), b1v2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
316 temp1 = vec_madd(temp2, costabv2r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
317 //vec_st(temp1, 32, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
318 b2v2 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
319 temp2 = vec_sub(vec_perm(b1v0, b1v0, reverse), b1v3);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
320 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
321 //vec_st(temp1, 48, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
322 b2v3 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
323 temp1 = vec_add(b1v4, vec_perm(b1v7, b1v7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
324 //vec_st(temp1, 64, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
325 b2v4 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
326 temp1 = vec_add(b1v5, vec_perm(b1v6, b1v6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
327 //vec_st(temp1, 80, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
328 b2v5 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
329 temp2 = vec_sub(b1v6, vec_perm(b1v5, b1v5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
330 temp1 = vec_madd(temp2, costabv2r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
331 //vec_st(temp1, 96, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
332 b2v6 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
333 temp2 = vec_sub(b1v7, vec_perm(b1v4, b1v4, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
334 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
335 //vec_st(temp1, 112, b2);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
336 b2v7 = temp1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
337 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
338
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
339 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
340 register real *costab = mp3lib_pnts[2];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
341
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
342
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
343 vector float costabv1r, costabv1, costabv2;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
344 vector unsigned char costab_perm = vec_lvsl(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
345
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
346 costabv1 = vec_ld(0, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
347 costabv2 = vec_ld(16, costab);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
348 costabv1 = vec_perm(costabv1, costabv2, costab_perm);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
349 costabv1r = vec_perm(costabv1, costabv1, reverse);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
350
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
351 temp1 = vec_add(b2v0, vec_perm(b2v1, b2v1, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
352 vec_st(temp1, 0, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
353 temp2 = vec_sub(vec_perm(b2v0, b2v0, reverse), b2v1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
354 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
355 vec_st(temp1, 16, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
356
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
357 temp1 = vec_add(b2v2, vec_perm(b2v3, b2v3, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
358 vec_st(temp1, 32, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
359 temp2 = vec_sub(b2v3, vec_perm(b2v2, b2v2, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
360 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
361 vec_st(temp1, 48, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
362
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
363 temp1 = vec_add(b2v4, vec_perm(b2v5, b2v5, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
364 vec_st(temp1, 64, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
365 temp2 = vec_sub(vec_perm(b2v4, b2v4, reverse), b2v5);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
366 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
367 vec_st(temp1, 80, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
368
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
369 temp1 = vec_add(b2v6, vec_perm(b2v7, b2v7, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
370 vec_st(temp1, 96, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
371 temp2 = vec_sub(b2v7, vec_perm(b2v6, b2v6, reverse));
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
372 temp1 = vec_madd(temp2, costabv1r, vczero);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
373 vec_st(temp1, 112, b1);
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
374
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
375 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
376 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
377 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
378
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
379 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
380
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
381 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
382 register real const cos0 = mp3lib_pnts[3][0];
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
383 register real const cos1 = mp3lib_pnts[3][1];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
384
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
385 b2[0x00] = b1[0x00] + b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
386 b2[0x01] = b1[0x01] + b1[0x02];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
387 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
388 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
389 b2[0x04] = b1[0x04] + b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
390 b2[0x05] = b1[0x05] + b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
391 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
392 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
393 b2[0x08] = b1[0x08] + b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
394 b2[0x09] = b1[0x09] + b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
395 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
396 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
397 b2[0x0C] = b1[0x0C] + b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
398 b2[0x0D] = b1[0x0D] + b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
399 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
400 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
401 b2[0x10] = b1[0x10] + b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
402 b2[0x11] = b1[0x11] + b1[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
403 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
404 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
405 b2[0x14] = b1[0x14] + b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
406 b2[0x15] = b1[0x15] + b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
407 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
408 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
409 b2[0x18] = b1[0x18] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
410 b2[0x19] = b1[0x19] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
411 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
412 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
413 b2[0x1C] = b1[0x1C] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
414 b2[0x1D] = b1[0x1D] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
415 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
416 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
417 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
418
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
419 {
12131
d155623271e3 fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents: 11980
diff changeset
420 register real const cos0 = mp3lib_pnts[4][0];
9002
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
421
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
422 b1[0x00] = b2[0x00] + b2[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
423 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
424 b1[0x02] = b2[0x02] + b2[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
425 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
426 b1[0x02] += b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
427
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
428 b1[0x04] = b2[0x04] + b2[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
429 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
430 b1[0x06] = b2[0x06] + b2[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
431 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
432 b1[0x06] += b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
433 b1[0x04] += b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
434 b1[0x06] += b1[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
435 b1[0x05] += b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
436
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
437 b1[0x08] = b2[0x08] + b2[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
438 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
439 b1[0x0A] = b2[0x0A] + b2[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
440 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
441 b1[0x0A] += b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
442
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
443 b1[0x0C] = b2[0x0C] + b2[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
444 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
445 b1[0x0E] = b2[0x0E] + b2[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
446 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
447 b1[0x0E] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
448 b1[0x0C] += b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
449 b1[0x0E] += b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
450 b1[0x0D] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
451
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
452 b1[0x10] = b2[0x10] + b2[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
453 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
454 b1[0x12] = b2[0x12] + b2[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
455 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
456 b1[0x12] += b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
457
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
458 b1[0x14] = b2[0x14] + b2[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
459 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
460 b1[0x16] = b2[0x16] + b2[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
461 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
462 b1[0x16] += b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
463 b1[0x14] += b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
464 b1[0x16] += b1[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
465 b1[0x15] += b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
466
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
467 b1[0x18] = b2[0x18] + b2[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
468 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
469 b1[0x1A] = b2[0x1A] + b2[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
470 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
471 b1[0x1A] += b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
472
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
473 b1[0x1C] = b2[0x1C] + b2[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
474 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
475 b1[0x1E] = b2[0x1E] + b2[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
476 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0;
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
477 b1[0x1E] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
478 b1[0x1C] += b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
479 b1[0x1E] += b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
480 b1[0x1D] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
481 }
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
482
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
483 out0[0x10*16] = b1[0x00];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
484 out0[0x10*12] = b1[0x04];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
485 out0[0x10* 8] = b1[0x02];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
486 out0[0x10* 4] = b1[0x06];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
487 out0[0x10* 0] = b1[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
488 out1[0x10* 0] = b1[0x01];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
489 out1[0x10* 4] = b1[0x05];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
490 out1[0x10* 8] = b1[0x03];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
491 out1[0x10*12] = b1[0x07];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
492
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
493 b1[0x08] += b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
494 out0[0x10*14] = b1[0x08];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
495 b1[0x0C] += b1[0x0a];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
496 out0[0x10*10] = b1[0x0C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
497 b1[0x0A] += b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
498 out0[0x10* 6] = b1[0x0A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
499 b1[0x0E] += b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
500 out0[0x10* 2] = b1[0x0E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
501 b1[0x09] += b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
502 out1[0x10* 2] = b1[0x09];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
503 b1[0x0D] += b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
504 out1[0x10* 6] = b1[0x0D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
505 b1[0x0B] += b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
506 out1[0x10*10] = b1[0x0B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
507 out1[0x10*14] = b1[0x0F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
508
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
509 b1[0x18] += b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
510 out0[0x10*15] = b1[0x10] + b1[0x18];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
511 out0[0x10*13] = b1[0x18] + b1[0x14];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
512 b1[0x1C] += b1[0x1a];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
513 out0[0x10*11] = b1[0x14] + b1[0x1C];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
514 out0[0x10* 9] = b1[0x1C] + b1[0x12];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
515 b1[0x1A] += b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
516 out0[0x10* 7] = b1[0x12] + b1[0x1A];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
517 out0[0x10* 5] = b1[0x1A] + b1[0x16];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
518 b1[0x1E] += b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
519 out0[0x10* 3] = b1[0x16] + b1[0x1E];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
520 out0[0x10* 1] = b1[0x1E] + b1[0x11];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
521 b1[0x19] += b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
522 out1[0x10* 1] = b1[0x11] + b1[0x19];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
523 out1[0x10* 3] = b1[0x19] + b1[0x15];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
524 b1[0x1D] += b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
525 out1[0x10* 5] = b1[0x15] + b1[0x1D];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
526 out1[0x10* 7] = b1[0x1D] + b1[0x13];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
527 b1[0x1B] += b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
528 out1[0x10* 9] = b1[0x13] + b1[0x1B];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
529 out1[0x10*11] = b1[0x1B] + b1[0x17];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
530 out1[0x10*13] = b1[0x17] + b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
531 out1[0x10*15] = b1[0x1F];
60d144a16088 An altivec-optimized DCT64 for mp3lib
arpi
parents:
diff changeset
532 }