Mercurial > libavcodec.hg
annotate ppc/fdct_altivec.c @ 5310:9aa9197034d7 libavcodec
AC-3 decoder, soc revision 40, Aug 9 00:10:14 2006 UTC by cloud9
More code cleanup.
Window is now runtime generated.
Fixed the bugs in rematrixing routine and
in Decoding AC3 Bitstreams when coupling is in use.
Still struggling to find out what affects the quality of
the produced sound. Can anybody have a look at the
imdct routines do_imdct_256 and do_imdct_512 and tell me
whether it is the correctly implemented as described in
standard.
author | jbr |
---|---|
date | Sat, 14 Jul 2007 15:57:51 +0000 |
parents | d5ba514e3f4a |
children | 33674fb857b5 |
rev | line source |
---|---|
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
1 /* ffmpeg/libavcodec/ppc/fdct_altivec.c, this file is part of the |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
2 * AltiVec optimized library for the FFMPEG Multimedia System |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
3 * Copyright (C) 2003 James Klicman <james@klicman.org> |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
16 * |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2819
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
20 */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
21 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
22 |
2819 | 23 #include "common.h" |
5010
d5ba514e3f4a
Add libavcodec to compiler include flags in order to simplify header
diego
parents:
3973
diff
changeset
|
24 #include "dsputil.h" |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
25 #include "dsputil_altivec.h" |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
26 #include "gcc_fixes.h" |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
27 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
28 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
29 #define vs16(v) ((vector signed short)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
30 #define vs32(v) ((vector signed int)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
31 #define vu8(v) ((vector unsigned char)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
32 #define vu16(v) ((vector unsigned short)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
33 #define vu32(v) ((vector unsigned int)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
34 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
35 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
36 #define C1 0.98078525066375732421875000 /* cos(1*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
37 #define C2 0.92387950420379638671875000 /* cos(2*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
38 #define C3 0.83146959543228149414062500 /* cos(3*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
39 #define C4 0.70710676908493041992187500 /* cos(4*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
40 #define C5 0.55557024478912353515625000 /* cos(5*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
41 #define C6 0.38268342614173889160156250 /* cos(6*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
42 #define C7 0.19509032368659973144531250 /* cos(7*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
43 #define SQRT_2 1.41421353816986083984375000 /* sqrt(2) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
44 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
45 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
46 #define W0 -(2 * C2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
47 #define W1 (2 * C6) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
48 #define W2 (SQRT_2 * C6) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
49 #define W3 (SQRT_2 * C3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
50 #define W4 (SQRT_2 * (-C1 + C3 + C5 - C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
51 #define W5 (SQRT_2 * ( C1 + C3 - C5 + C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
52 #define W6 (SQRT_2 * ( C1 + C3 + C5 - C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
53 #define W7 (SQRT_2 * ( C1 + C3 - C5 - C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
54 #define W8 (SQRT_2 * ( C7 - C3)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
55 #define W9 (SQRT_2 * (-C1 - C3)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
56 #define WA (SQRT_2 * (-C3 - C5)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
57 #define WB (SQRT_2 * ( C5 - C3)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
58 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
59 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
60 static vector float fdctconsts[3] = { |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
61 (vector float)AVV( W0, W1, W2, W3 ), |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
62 (vector float)AVV( W4, W5, W6, W7 ), |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
63 (vector float)AVV( W8, W9, WA, WB ) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
64 }; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
65 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
66 #define LD_W0 vec_splat(cnsts0, 0) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
67 #define LD_W1 vec_splat(cnsts0, 1) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
68 #define LD_W2 vec_splat(cnsts0, 2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
69 #define LD_W3 vec_splat(cnsts0, 3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
70 #define LD_W4 vec_splat(cnsts1, 0) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
71 #define LD_W5 vec_splat(cnsts1, 1) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
72 #define LD_W6 vec_splat(cnsts1, 2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
73 #define LD_W7 vec_splat(cnsts1, 3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
74 #define LD_W8 vec_splat(cnsts2, 0) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
75 #define LD_W9 vec_splat(cnsts2, 1) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
76 #define LD_WA vec_splat(cnsts2, 2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
77 #define LD_WB vec_splat(cnsts2, 3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
78 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
79 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
80 #define FDCTROW(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
81 x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
82 x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
83 x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
84 x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
85 x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
86 x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
87 x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
88 x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
89 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
90 b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
91 b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
92 b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
93 b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
94 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
95 b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
96 b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
97 b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
98 cnst = LD_W2; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
99 b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
100 cnst = LD_W1; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
101 b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
102 cnst = LD_W0; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
103 b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
104 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
105 x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
106 x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
107 x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
108 x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
109 x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
110 cnst = LD_W3; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
111 x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
112 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
113 cnst = LD_W8; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
114 x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
115 cnst = LD_W9; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
116 x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
117 cnst = LD_WA; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
118 x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
119 cnst = LD_WB; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
120 x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
121 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
122 cnst = LD_W4; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
123 b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
124 cnst = LD_W5; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
125 b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
126 cnst = LD_W6; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
127 b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
128 cnst = LD_W7; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
129 b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
130 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
131 b7 = vec_add(b7, x2); /* b7 = b7 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
132 b5 = vec_add(b5, x3); /* b5 = b5 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
133 b3 = vec_add(b3, x2); /* b3 = b3 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
134 b1 = vec_add(b1, x3); /* b1 = b1 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
135 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
136 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
137 #define FDCTCOL(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
138 x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
139 x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
140 x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
141 x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
142 x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
143 x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
144 x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
145 x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
146 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
147 b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
148 b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
149 b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
150 b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
151 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
152 b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
153 b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
154 b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
155 cnst = LD_W2; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
156 b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
157 cnst = LD_W1; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
158 b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
159 cnst = LD_W0; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
160 b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
161 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
162 x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
163 x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
164 x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
165 x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
166 x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
167 cnst = LD_W3; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
168 x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
169 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
170 cnst = LD_W8; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
171 x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
172 cnst = LD_W9; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
173 x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
174 cnst = LD_WA; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
175 x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
176 cnst = LD_WB; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
177 x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
178 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
179 cnst = LD_W4; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
180 b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
181 cnst = LD_W5; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
182 b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
183 cnst = LD_W6; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
184 b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
185 cnst = LD_W7; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
186 b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
187 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
188 b7 = vec_add(b7, x2); /* b7 += x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
189 b5 = vec_add(b5, x3); /* b5 += x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
190 b3 = vec_add(b3, x2); /* b3 += x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
191 b1 = vec_add(b1, x3); /* b1 += x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
192 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
193 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
194 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
195 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
196 /* two dimensional discrete cosine transform */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
197 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
198 void fdct_altivec(int16_t *block) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
199 { |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
200 POWERPC_PERF_DECLARE(altivec_fdct, 1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
201 vector signed short *bp; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
202 vector float *cp; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
203 vector float b00, b10, b20, b30, b40, b50, b60, b70; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
204 vector float b01, b11, b21, b31, b41, b51, b61, b71; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
205 vector float mzero, cnst, cnsts0, cnsts1, cnsts2; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
206 vector float x0, x1, x2, x3, x4, x5, x6, x7, x8; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
207 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
208 POWERPC_PERF_START_COUNT(altivec_fdct, 1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
209 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
210 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
211 /* setup constants {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
212 /* mzero = -0.0 */ |
2612 | 213 mzero = ((vector float)vec_splat_u32(-1)); |
214 mzero = ((vector float)vec_sl(vu32(mzero), vu32(mzero))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
215 cp = fdctconsts; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
216 cnsts0 = vec_ld(0, cp); cp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
217 cnsts1 = vec_ld(0, cp); cp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
218 cnsts2 = vec_ld(0, cp); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
219 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
220 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
221 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
222 /* 8x8 matrix transpose (vector short[8]) {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
223 #define MERGE_S16(hl,a,b) vec_merge##hl(vs16(a), vs16(b)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
224 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
225 bp = (vector signed short*)block; |
2612 | 226 b00 = ((vector float)vec_ld(0, bp)); |
227 b40 = ((vector float)vec_ld(16*4, bp)); | |
228 b01 = ((vector float)MERGE_S16(h, b00, b40)); | |
229 b11 = ((vector float)MERGE_S16(l, b00, b40)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
230 bp++; |
2612 | 231 b10 = ((vector float)vec_ld(0, bp)); |
232 b50 = ((vector float)vec_ld(16*4, bp)); | |
233 b21 = ((vector float)MERGE_S16(h, b10, b50)); | |
234 b31 = ((vector float)MERGE_S16(l, b10, b50)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
235 bp++; |
2612 | 236 b20 = ((vector float)vec_ld(0, bp)); |
237 b60 = ((vector float)vec_ld(16*4, bp)); | |
238 b41 = ((vector float)MERGE_S16(h, b20, b60)); | |
239 b51 = ((vector float)MERGE_S16(l, b20, b60)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
240 bp++; |
2612 | 241 b30 = ((vector float)vec_ld(0, bp)); |
242 b70 = ((vector float)vec_ld(16*4, bp)); | |
243 b61 = ((vector float)MERGE_S16(h, b30, b70)); | |
244 b71 = ((vector float)MERGE_S16(l, b30, b70)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
245 |
2612 | 246 x0 = ((vector float)MERGE_S16(h, b01, b41)); |
247 x1 = ((vector float)MERGE_S16(l, b01, b41)); | |
248 x2 = ((vector float)MERGE_S16(h, b11, b51)); | |
249 x3 = ((vector float)MERGE_S16(l, b11, b51)); | |
250 x4 = ((vector float)MERGE_S16(h, b21, b61)); | |
251 x5 = ((vector float)MERGE_S16(l, b21, b61)); | |
252 x6 = ((vector float)MERGE_S16(h, b31, b71)); | |
253 x7 = ((vector float)MERGE_S16(l, b31, b71)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
254 |
2612 | 255 b00 = ((vector float)MERGE_S16(h, x0, x4)); |
256 b10 = ((vector float)MERGE_S16(l, x0, x4)); | |
257 b20 = ((vector float)MERGE_S16(h, x1, x5)); | |
258 b30 = ((vector float)MERGE_S16(l, x1, x5)); | |
259 b40 = ((vector float)MERGE_S16(h, x2, x6)); | |
260 b50 = ((vector float)MERGE_S16(l, x2, x6)); | |
261 b60 = ((vector float)MERGE_S16(h, x3, x7)); | |
262 b70 = ((vector float)MERGE_S16(l, x3, x7)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
263 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
264 #undef MERGE_S16 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
265 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
266 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
267 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
268 /* Some of the initial calculations can be done as vector short before |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
269 * conversion to vector float. The following code section takes advantage |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
270 * of this. |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
271 */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
272 #if 1 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
273 /* fdct rows {{{ */ |
2612 | 274 x0 = ((vector float)vec_add(vs16(b00), vs16(b70))); |
275 x7 = ((vector float)vec_sub(vs16(b00), vs16(b70))); | |
276 x1 = ((vector float)vec_add(vs16(b10), vs16(b60))); | |
277 x6 = ((vector float)vec_sub(vs16(b10), vs16(b60))); | |
278 x2 = ((vector float)vec_add(vs16(b20), vs16(b50))); | |
279 x5 = ((vector float)vec_sub(vs16(b20), vs16(b50))); | |
280 x3 = ((vector float)vec_add(vs16(b30), vs16(b40))); | |
281 x4 = ((vector float)vec_sub(vs16(b30), vs16(b40))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
282 |
2612 | 283 b70 = ((vector float)vec_add(vs16(x0), vs16(x3))); |
284 b10 = ((vector float)vec_add(vs16(x1), vs16(x2))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
285 |
2612 | 286 b00 = ((vector float)vec_add(vs16(b70), vs16(b10))); |
287 b40 = ((vector float)vec_sub(vs16(b70), vs16(b10))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
288 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
289 #define CTF0(n) \ |
2612 | 290 b##n##1 = ((vector float)vec_unpackl(vs16(b##n##0))); \ |
291 b##n##0 = ((vector float)vec_unpackh(vs16(b##n##0))); \ | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
292 b##n##1 = vec_ctf(vs32(b##n##1), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
293 b##n##0 = vec_ctf(vs32(b##n##0), 0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
294 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
295 CTF0(0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
296 CTF0(4); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
297 |
2612 | 298 b20 = ((vector float)vec_sub(vs16(x0), vs16(x3))); |
299 b60 = ((vector float)vec_sub(vs16(x1), vs16(x2))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
300 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
301 CTF0(2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
302 CTF0(6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
303 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
304 #undef CTF0 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
305 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
306 x0 = vec_add(b60, b20); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
307 x1 = vec_add(b61, b21); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
308 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
309 cnst = LD_W2; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
310 x0 = vec_madd(cnst, x0, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
311 x1 = vec_madd(cnst, x1, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
312 cnst = LD_W1; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
313 b20 = vec_madd(cnst, b20, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
314 b21 = vec_madd(cnst, b21, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
315 cnst = LD_W0; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
316 b60 = vec_madd(cnst, b60, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
317 b61 = vec_madd(cnst, b61, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
318 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
319 #define CTFX(x,b) \ |
2612 | 320 b##0 = ((vector float)vec_unpackh(vs16(x))); \ |
321 b##1 = ((vector float)vec_unpackl(vs16(x))); \ | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
322 b##0 = vec_ctf(vs32(b##0), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
323 b##1 = vec_ctf(vs32(b##1), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
324 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
325 CTFX(x4, b7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
326 CTFX(x5, b5); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
327 CTFX(x6, b3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
328 CTFX(x7, b1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
329 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
330 #undef CTFX |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
331 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
332 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
333 x0 = vec_add(b70, b10); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
334 x1 = vec_add(b50, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
335 x2 = vec_add(b70, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
336 x3 = vec_add(b50, b10); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
337 x8 = vec_add(x2, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
338 cnst = LD_W3; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
339 x8 = vec_madd(cnst, x8, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
340 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
341 cnst = LD_W8; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
342 x0 = vec_madd(cnst, x0, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
343 cnst = LD_W9; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
344 x1 = vec_madd(cnst, x1, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
345 cnst = LD_WA; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
346 x2 = vec_madd(cnst, x2, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
347 cnst = LD_WB; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
348 x3 = vec_madd(cnst, x3, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
349 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
350 cnst = LD_W4; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
351 b70 = vec_madd(cnst, b70, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
352 cnst = LD_W5; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
353 b50 = vec_madd(cnst, b50, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
354 cnst = LD_W6; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
355 b30 = vec_madd(cnst, b30, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
356 cnst = LD_W7; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
357 b10 = vec_madd(cnst, b10, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
358 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
359 b70 = vec_add(b70, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
360 b50 = vec_add(b50, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
361 b30 = vec_add(b30, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
362 b10 = vec_add(b10, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
363 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
364 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
365 x0 = vec_add(b71, b11); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
366 x1 = vec_add(b51, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
367 x2 = vec_add(b71, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
368 x3 = vec_add(b51, b11); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
369 x8 = vec_add(x2, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
370 cnst = LD_W3; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
371 x8 = vec_madd(cnst, x8, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
372 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
373 cnst = LD_W8; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
374 x0 = vec_madd(cnst, x0, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
375 cnst = LD_W9; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
376 x1 = vec_madd(cnst, x1, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
377 cnst = LD_WA; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
378 x2 = vec_madd(cnst, x2, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
379 cnst = LD_WB; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
380 x3 = vec_madd(cnst, x3, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
381 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
382 cnst = LD_W4; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
383 b71 = vec_madd(cnst, b71, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
384 cnst = LD_W5; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
385 b51 = vec_madd(cnst, b51, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
386 cnst = LD_W6; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
387 b31 = vec_madd(cnst, b31, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
388 cnst = LD_W7; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
389 b11 = vec_madd(cnst, b11, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
390 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
391 b71 = vec_add(b71, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
392 b51 = vec_add(b51, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
393 b31 = vec_add(b31, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
394 b11 = vec_add(b11, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
395 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
396 #else |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
397 /* convert to float {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
398 #define CTF(n) \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
399 vs32(b##n##1) = vec_unpackl(vs16(b##n##0)); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
400 vs32(b##n##0) = vec_unpackh(vs16(b##n##0)); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
401 b##n##1 = vec_ctf(vs32(b##n##1), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
402 b##n##0 = vec_ctf(vs32(b##n##0), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
403 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
404 CTF(0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
405 CTF(1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
406 CTF(2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
407 CTF(3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
408 CTF(4); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
409 CTF(5); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
410 CTF(6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
411 CTF(7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
412 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
413 #undef CTF |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
414 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
415 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
416 FDCTROW(b00, b10, b20, b30, b40, b50, b60, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
417 FDCTROW(b01, b11, b21, b31, b41, b51, b61, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
418 #endif |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
419 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
420 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
421 /* 8x8 matrix transpose (vector float[8][2]) {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
422 x0 = vec_mergel(b00, b20); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
423 x1 = vec_mergeh(b00, b20); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
424 x2 = vec_mergel(b10, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
425 x3 = vec_mergeh(b10, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
426 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
427 b00 = vec_mergeh(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
428 b10 = vec_mergel(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
429 b20 = vec_mergeh(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
430 b30 = vec_mergel(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
431 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
432 x4 = vec_mergel(b41, b61); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
433 x5 = vec_mergeh(b41, b61); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
434 x6 = vec_mergel(b51, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
435 x7 = vec_mergeh(b51, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
436 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
437 b41 = vec_mergeh(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
438 b51 = vec_mergel(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
439 b61 = vec_mergeh(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
440 b71 = vec_mergel(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
441 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
442 x0 = vec_mergel(b01, b21); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
443 x1 = vec_mergeh(b01, b21); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
444 x2 = vec_mergel(b11, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
445 x3 = vec_mergeh(b11, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
446 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
447 x4 = vec_mergel(b40, b60); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
448 x5 = vec_mergeh(b40, b60); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
449 x6 = vec_mergel(b50, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
450 x7 = vec_mergeh(b50, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
451 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
452 b40 = vec_mergeh(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
453 b50 = vec_mergel(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
454 b60 = vec_mergeh(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
455 b70 = vec_mergel(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
456 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
457 b01 = vec_mergeh(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
458 b11 = vec_mergel(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
459 b21 = vec_mergeh(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
460 b31 = vec_mergel(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
461 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
462 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
463 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
464 FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
465 FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
466 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
467 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
468 /* round, convert back to short {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
469 #define CTS(n) \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
470 b##n##0 = vec_round(b##n##0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
471 b##n##1 = vec_round(b##n##1); \ |
2612 | 472 b##n##0 = ((vector float)vec_cts(b##n##0, 0)); \ |
473 b##n##1 = ((vector float)vec_cts(b##n##1, 0)); \ | |
474 b##n##0 = ((vector float)vec_pack(vs32(b##n##0), vs32(b##n##1))); \ | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
475 vec_st(vs16(b##n##0), 0, bp); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
476 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
477 bp = (vector signed short*)block; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
478 CTS(0); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
479 CTS(1); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
480 CTS(2); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
481 CTS(3); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
482 CTS(4); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
483 CTS(5); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
484 CTS(6); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
485 CTS(7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
486 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
487 #undef CTS |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
488 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
489 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
490 POWERPC_PERF_STOP_COUNT(altivec_fdct, 1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
491 } |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
492 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
493 /* vim:set foldmethod=marker foldlevel=0: */ |