Mercurial > libavcodec.hg
annotate ppc/fdct_altivec.c @ 12340:2d15f62f4f8a libavcodec
VP8: move zeroing of luma DC block into the WHT
Lets us do the zeroing in asm instead of C.
Also makes it consistent with the way the regular iDCT code does it.
author | darkshikari |
---|---|
date | Mon, 02 Aug 2010 20:18:09 +0000 |
parents | 3cd4cd0509cd |
children |
rev | line source |
---|---|
9171 | 1 /* |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
2 * Copyright (C) 2003 James Klicman <james@klicman.org> |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
3 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
15 * |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2819
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
19 */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
20 |
9421
dd2b5e52336a
Remove gcc_fixes.h. It only contains workarounds for unsupported gcc versions.
diego
parents:
9171
diff
changeset
|
21 #include "config.h" |
dd2b5e52336a
Remove gcc_fixes.h. It only contains workarounds for unsupported gcc versions.
diego
parents:
9171
diff
changeset
|
22 #if HAVE_ALTIVEC_H |
dd2b5e52336a
Remove gcc_fixes.h. It only contains workarounds for unsupported gcc versions.
diego
parents:
9171
diff
changeset
|
23 #include <altivec.h> |
dd2b5e52336a
Remove gcc_fixes.h. It only contains workarounds for unsupported gcc versions.
diego
parents:
9171
diff
changeset
|
24 #endif |
6763 | 25 #include "libavutil/common.h" |
26 #include "libavcodec/dsputil.h" | |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9421
diff
changeset
|
27 #include "dsputil_altivec.h" |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
28 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
29 #define vs16(v) ((vector signed short)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
30 #define vs32(v) ((vector signed int)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
31 #define vu8(v) ((vector unsigned char)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
32 #define vu16(v) ((vector unsigned short)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
33 #define vu32(v) ((vector unsigned int)(v)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
34 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
35 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
36 #define C1 0.98078525066375732421875000 /* cos(1*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
37 #define C2 0.92387950420379638671875000 /* cos(2*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
38 #define C3 0.83146959543228149414062500 /* cos(3*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
39 #define C4 0.70710676908493041992187500 /* cos(4*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
40 #define C5 0.55557024478912353515625000 /* cos(5*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
41 #define C6 0.38268342614173889160156250 /* cos(6*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
42 #define C7 0.19509032368659973144531250 /* cos(7*PI/16) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
43 #define SQRT_2 1.41421353816986083984375000 /* sqrt(2) */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
44 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
45 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
46 #define W0 -(2 * C2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
47 #define W1 (2 * C6) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
48 #define W2 (SQRT_2 * C6) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
49 #define W3 (SQRT_2 * C3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
50 #define W4 (SQRT_2 * (-C1 + C3 + C5 - C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
51 #define W5 (SQRT_2 * ( C1 + C3 - C5 + C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
52 #define W6 (SQRT_2 * ( C1 + C3 + C5 - C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
53 #define W7 (SQRT_2 * ( C1 + C3 - C5 - C7)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
54 #define W8 (SQRT_2 * ( C7 - C3)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
55 #define W9 (SQRT_2 * (-C1 - C3)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
56 #define WA (SQRT_2 * (-C3 - C5)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
57 #define WB (SQRT_2 * ( C5 - C3)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
58 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
59 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
60 static vector float fdctconsts[3] = { |
7373
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
7223
diff
changeset
|
61 { W0, W1, W2, W3 }, |
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
7223
diff
changeset
|
62 { W4, W5, W6, W7 }, |
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
7223
diff
changeset
|
63 { W8, W9, WA, WB } |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
64 }; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
65 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
66 #define LD_W0 vec_splat(cnsts0, 0) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
67 #define LD_W1 vec_splat(cnsts0, 1) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
68 #define LD_W2 vec_splat(cnsts0, 2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
69 #define LD_W3 vec_splat(cnsts0, 3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
70 #define LD_W4 vec_splat(cnsts1, 0) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
71 #define LD_W5 vec_splat(cnsts1, 1) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
72 #define LD_W6 vec_splat(cnsts1, 2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
73 #define LD_W7 vec_splat(cnsts1, 3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
74 #define LD_W8 vec_splat(cnsts2, 0) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
75 #define LD_W9 vec_splat(cnsts2, 1) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
76 #define LD_WA vec_splat(cnsts2, 2) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
77 #define LD_WB vec_splat(cnsts2, 3) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
78 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
79 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
80 #define FDCTROW(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
81 x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
82 x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
83 x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
84 x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
85 x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
86 x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
87 x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
88 x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
89 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
90 b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
91 b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
92 b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
93 b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
94 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
95 b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
96 b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
97 b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
98 cnst = LD_W2; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
99 b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
100 cnst = LD_W1; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
101 b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
102 cnst = LD_W0; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
103 b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
104 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
105 x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
106 x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
107 x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
108 x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
109 x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
110 cnst = LD_W3; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
111 x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
112 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
113 cnst = LD_W8; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
114 x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
115 cnst = LD_W9; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
116 x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
117 cnst = LD_WA; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
118 x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
119 cnst = LD_WB; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
120 x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
121 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
122 cnst = LD_W4; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
123 b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
124 cnst = LD_W5; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
125 b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
126 cnst = LD_W6; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
127 b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
128 cnst = LD_W7; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
129 b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
130 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
131 b7 = vec_add(b7, x2); /* b7 = b7 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
132 b5 = vec_add(b5, x3); /* b5 = b5 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
133 b3 = vec_add(b3, x2); /* b3 = b3 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
134 b1 = vec_add(b1, x3); /* b1 = b1 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
135 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
136 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
137 #define FDCTCOL(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
138 x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
139 x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
140 x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
141 x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
142 x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
143 x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
144 x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
145 x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
146 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
147 b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
148 b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
149 b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
150 b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
151 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
152 b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
153 b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
154 b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
155 cnst = LD_W2; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
156 b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
157 cnst = LD_W1; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
158 b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
159 cnst = LD_W0; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
160 b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
161 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
162 x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
163 x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
164 x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
165 x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
166 x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
167 cnst = LD_W3; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
168 x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
169 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
170 cnst = LD_W8; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
171 x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
172 cnst = LD_W9; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
173 x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
174 cnst = LD_WA; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
175 x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
176 cnst = LD_WB; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
177 x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
178 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
179 cnst = LD_W4; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
180 b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
181 cnst = LD_W5; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
182 b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
183 cnst = LD_W6; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
184 b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
185 cnst = LD_W7; \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
186 b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
187 \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
188 b7 = vec_add(b7, x2); /* b7 += x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
189 b5 = vec_add(b5, x3); /* b5 += x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
190 b3 = vec_add(b3, x2); /* b3 += x2; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
191 b1 = vec_add(b1, x3); /* b1 += x3; */ \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
192 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
193 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
194 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
195 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
196 /* two dimensional discrete cosine transform */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
197 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
198 void fdct_altivec(int16_t *block) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
199 { |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
200 vector signed short *bp; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
201 vector float *cp; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
202 vector float b00, b10, b20, b30, b40, b50, b60, b70; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
203 vector float b01, b11, b21, b31, b41, b51, b61, b71; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
204 vector float mzero, cnst, cnsts0, cnsts1, cnsts2; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
205 vector float x0, x1, x2, x3, x4, x5, x6, x7, x8; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
206 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
207 /* setup constants {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
208 /* mzero = -0.0 */ |
2612 | 209 mzero = ((vector float)vec_splat_u32(-1)); |
210 mzero = ((vector float)vec_sl(vu32(mzero), vu32(mzero))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
211 cp = fdctconsts; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
212 cnsts0 = vec_ld(0, cp); cp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
213 cnsts1 = vec_ld(0, cp); cp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
214 cnsts2 = vec_ld(0, cp); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
215 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
216 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
217 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
218 /* 8x8 matrix transpose (vector short[8]) {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
219 #define MERGE_S16(hl,a,b) vec_merge##hl(vs16(a), vs16(b)) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
220 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
221 bp = (vector signed short*)block; |
2612 | 222 b00 = ((vector float)vec_ld(0, bp)); |
223 b40 = ((vector float)vec_ld(16*4, bp)); | |
224 b01 = ((vector float)MERGE_S16(h, b00, b40)); | |
225 b11 = ((vector float)MERGE_S16(l, b00, b40)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
226 bp++; |
2612 | 227 b10 = ((vector float)vec_ld(0, bp)); |
228 b50 = ((vector float)vec_ld(16*4, bp)); | |
229 b21 = ((vector float)MERGE_S16(h, b10, b50)); | |
230 b31 = ((vector float)MERGE_S16(l, b10, b50)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
231 bp++; |
2612 | 232 b20 = ((vector float)vec_ld(0, bp)); |
233 b60 = ((vector float)vec_ld(16*4, bp)); | |
234 b41 = ((vector float)MERGE_S16(h, b20, b60)); | |
235 b51 = ((vector float)MERGE_S16(l, b20, b60)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
236 bp++; |
2612 | 237 b30 = ((vector float)vec_ld(0, bp)); |
238 b70 = ((vector float)vec_ld(16*4, bp)); | |
239 b61 = ((vector float)MERGE_S16(h, b30, b70)); | |
240 b71 = ((vector float)MERGE_S16(l, b30, b70)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
241 |
2612 | 242 x0 = ((vector float)MERGE_S16(h, b01, b41)); |
243 x1 = ((vector float)MERGE_S16(l, b01, b41)); | |
244 x2 = ((vector float)MERGE_S16(h, b11, b51)); | |
245 x3 = ((vector float)MERGE_S16(l, b11, b51)); | |
246 x4 = ((vector float)MERGE_S16(h, b21, b61)); | |
247 x5 = ((vector float)MERGE_S16(l, b21, b61)); | |
248 x6 = ((vector float)MERGE_S16(h, b31, b71)); | |
249 x7 = ((vector float)MERGE_S16(l, b31, b71)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
250 |
2612 | 251 b00 = ((vector float)MERGE_S16(h, x0, x4)); |
252 b10 = ((vector float)MERGE_S16(l, x0, x4)); | |
253 b20 = ((vector float)MERGE_S16(h, x1, x5)); | |
254 b30 = ((vector float)MERGE_S16(l, x1, x5)); | |
255 b40 = ((vector float)MERGE_S16(h, x2, x6)); | |
256 b50 = ((vector float)MERGE_S16(l, x2, x6)); | |
257 b60 = ((vector float)MERGE_S16(h, x3, x7)); | |
258 b70 = ((vector float)MERGE_S16(l, x3, x7)); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
259 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
260 #undef MERGE_S16 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
261 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
262 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
263 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
264 /* Some of the initial calculations can be done as vector short before |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
265 * conversion to vector float. The following code section takes advantage |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
266 * of this. |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
267 */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
268 #if 1 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
269 /* fdct rows {{{ */ |
2612 | 270 x0 = ((vector float)vec_add(vs16(b00), vs16(b70))); |
271 x7 = ((vector float)vec_sub(vs16(b00), vs16(b70))); | |
272 x1 = ((vector float)vec_add(vs16(b10), vs16(b60))); | |
273 x6 = ((vector float)vec_sub(vs16(b10), vs16(b60))); | |
274 x2 = ((vector float)vec_add(vs16(b20), vs16(b50))); | |
275 x5 = ((vector float)vec_sub(vs16(b20), vs16(b50))); | |
276 x3 = ((vector float)vec_add(vs16(b30), vs16(b40))); | |
277 x4 = ((vector float)vec_sub(vs16(b30), vs16(b40))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
278 |
2612 | 279 b70 = ((vector float)vec_add(vs16(x0), vs16(x3))); |
280 b10 = ((vector float)vec_add(vs16(x1), vs16(x2))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
281 |
2612 | 282 b00 = ((vector float)vec_add(vs16(b70), vs16(b10))); |
283 b40 = ((vector float)vec_sub(vs16(b70), vs16(b10))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
284 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
285 #define CTF0(n) \ |
2612 | 286 b##n##1 = ((vector float)vec_unpackl(vs16(b##n##0))); \ |
287 b##n##0 = ((vector float)vec_unpackh(vs16(b##n##0))); \ | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
288 b##n##1 = vec_ctf(vs32(b##n##1), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
289 b##n##0 = vec_ctf(vs32(b##n##0), 0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
290 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
291 CTF0(0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
292 CTF0(4); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
293 |
2612 | 294 b20 = ((vector float)vec_sub(vs16(x0), vs16(x3))); |
295 b60 = ((vector float)vec_sub(vs16(x1), vs16(x2))); | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
296 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
297 CTF0(2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
298 CTF0(6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
299 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
300 #undef CTF0 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
301 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
302 x0 = vec_add(b60, b20); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
303 x1 = vec_add(b61, b21); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
304 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
305 cnst = LD_W2; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
306 x0 = vec_madd(cnst, x0, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
307 x1 = vec_madd(cnst, x1, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
308 cnst = LD_W1; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
309 b20 = vec_madd(cnst, b20, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
310 b21 = vec_madd(cnst, b21, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
311 cnst = LD_W0; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
312 b60 = vec_madd(cnst, b60, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
313 b61 = vec_madd(cnst, b61, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
314 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
315 #define CTFX(x,b) \ |
2612 | 316 b##0 = ((vector float)vec_unpackh(vs16(x))); \ |
317 b##1 = ((vector float)vec_unpackl(vs16(x))); \ | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
318 b##0 = vec_ctf(vs32(b##0), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
319 b##1 = vec_ctf(vs32(b##1), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
320 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
321 CTFX(x4, b7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
322 CTFX(x5, b5); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
323 CTFX(x6, b3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
324 CTFX(x7, b1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
325 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
326 #undef CTFX |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
327 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
328 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
329 x0 = vec_add(b70, b10); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
330 x1 = vec_add(b50, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
331 x2 = vec_add(b70, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
332 x3 = vec_add(b50, b10); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
333 x8 = vec_add(x2, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
334 cnst = LD_W3; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
335 x8 = vec_madd(cnst, x8, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
336 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
337 cnst = LD_W8; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
338 x0 = vec_madd(cnst, x0, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
339 cnst = LD_W9; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
340 x1 = vec_madd(cnst, x1, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
341 cnst = LD_WA; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
342 x2 = vec_madd(cnst, x2, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
343 cnst = LD_WB; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
344 x3 = vec_madd(cnst, x3, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
345 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
346 cnst = LD_W4; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
347 b70 = vec_madd(cnst, b70, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
348 cnst = LD_W5; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
349 b50 = vec_madd(cnst, b50, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
350 cnst = LD_W6; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
351 b30 = vec_madd(cnst, b30, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
352 cnst = LD_W7; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
353 b10 = vec_madd(cnst, b10, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
354 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
355 b70 = vec_add(b70, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
356 b50 = vec_add(b50, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
357 b30 = vec_add(b30, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
358 b10 = vec_add(b10, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
359 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
360 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
361 x0 = vec_add(b71, b11); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
362 x1 = vec_add(b51, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
363 x2 = vec_add(b71, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
364 x3 = vec_add(b51, b11); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
365 x8 = vec_add(x2, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
366 cnst = LD_W3; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
367 x8 = vec_madd(cnst, x8, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
368 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
369 cnst = LD_W8; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
370 x0 = vec_madd(cnst, x0, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
371 cnst = LD_W9; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
372 x1 = vec_madd(cnst, x1, mzero); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
373 cnst = LD_WA; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
374 x2 = vec_madd(cnst, x2, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
375 cnst = LD_WB; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
376 x3 = vec_madd(cnst, x3, x8); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
377 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
378 cnst = LD_W4; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
379 b71 = vec_madd(cnst, b71, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
380 cnst = LD_W5; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
381 b51 = vec_madd(cnst, b51, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
382 cnst = LD_W6; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
383 b31 = vec_madd(cnst, b31, x1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
384 cnst = LD_W7; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
385 b11 = vec_madd(cnst, b11, x0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
386 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
387 b71 = vec_add(b71, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
388 b51 = vec_add(b51, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
389 b31 = vec_add(b31, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
390 b11 = vec_add(b11, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
391 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
392 #else |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
393 /* convert to float {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
394 #define CTF(n) \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
395 vs32(b##n##1) = vec_unpackl(vs16(b##n##0)); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
396 vs32(b##n##0) = vec_unpackh(vs16(b##n##0)); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
397 b##n##1 = vec_ctf(vs32(b##n##1), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
398 b##n##0 = vec_ctf(vs32(b##n##0), 0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
399 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
400 CTF(0); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
401 CTF(1); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
402 CTF(2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
403 CTF(3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
404 CTF(4); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
405 CTF(5); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
406 CTF(6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
407 CTF(7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
408 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
409 #undef CTF |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
410 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
411 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
412 FDCTROW(b00, b10, b20, b30, b40, b50, b60, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
413 FDCTROW(b01, b11, b21, b31, b41, b51, b61, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
414 #endif |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
415 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
416 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
417 /* 8x8 matrix transpose (vector float[8][2]) {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
418 x0 = vec_mergel(b00, b20); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
419 x1 = vec_mergeh(b00, b20); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
420 x2 = vec_mergel(b10, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
421 x3 = vec_mergeh(b10, b30); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
422 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
423 b00 = vec_mergeh(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
424 b10 = vec_mergel(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
425 b20 = vec_mergeh(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
426 b30 = vec_mergel(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
427 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
428 x4 = vec_mergel(b41, b61); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
429 x5 = vec_mergeh(b41, b61); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
430 x6 = vec_mergel(b51, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
431 x7 = vec_mergeh(b51, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
432 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
433 b41 = vec_mergeh(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
434 b51 = vec_mergel(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
435 b61 = vec_mergeh(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
436 b71 = vec_mergel(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
437 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
438 x0 = vec_mergel(b01, b21); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
439 x1 = vec_mergeh(b01, b21); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
440 x2 = vec_mergel(b11, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
441 x3 = vec_mergeh(b11, b31); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
442 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
443 x4 = vec_mergel(b40, b60); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
444 x5 = vec_mergeh(b40, b60); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
445 x6 = vec_mergel(b50, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
446 x7 = vec_mergeh(b50, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
447 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
448 b40 = vec_mergeh(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
449 b50 = vec_mergel(x1, x3); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
450 b60 = vec_mergeh(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
451 b70 = vec_mergel(x0, x2); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
452 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
453 b01 = vec_mergeh(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
454 b11 = vec_mergel(x5, x7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
455 b21 = vec_mergeh(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
456 b31 = vec_mergel(x4, x6); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
457 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
458 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
459 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
460 FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
461 FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
462 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
463 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
464 /* round, convert back to short {{{ */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
465 #define CTS(n) \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
466 b##n##0 = vec_round(b##n##0); \ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
467 b##n##1 = vec_round(b##n##1); \ |
2612 | 468 b##n##0 = ((vector float)vec_cts(b##n##0, 0)); \ |
469 b##n##1 = ((vector float)vec_cts(b##n##1, 0)); \ | |
470 b##n##0 = ((vector float)vec_pack(vs32(b##n##0), vs32(b##n##1))); \ | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
471 vec_st(vs16(b##n##0), 0, bp); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
472 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
473 bp = (vector signed short*)block; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
474 CTS(0); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
475 CTS(1); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
476 CTS(2); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
477 CTS(3); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
478 CTS(4); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
479 CTS(5); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
480 CTS(6); bp++; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
481 CTS(7); |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
482 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
483 #undef CTS |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
484 /* }}} */ |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
485 } |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
486 |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
diff
changeset
|
487 /* vim:set foldmethod=marker foldlevel=0: */ |