Mercurial > libavcodec.hg
annotate ps2/idct_mmi.c @ 7351:1502ba3beb72 libavcodec
The codebook generator algorithm involves picking three
different codebook centroids ("high utility", "low
utility" and "closest to the low utility one"). This
change avoid the corner case of choosing two times the
same centroid.
author | vitor |
---|---|
date | Wed, 23 Jul 2008 03:54:31 +0000 |
parents | f7cbb7733146 |
children | eebc7209c47f |
rev | line source |
---|---|
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
1 /* |
3680
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
2 * Originally provided by Intel at Application Note AP-922. |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
3 * |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
4 * Column code adapted from Peter Gubanov. |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
5 * Copyright (c) 2000-2001 Peter Gubanov <peter@elecard.net.ru> |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
6 * http://www.elecard.com/peter/idct.shtml |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
7 * rounding trick copyright (c) 2000 Michel Lespinasse <walken@zoy.org> |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
8 * |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
9 * MMI port and (c) 2002 by Leon van Stuivenberg |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3680
diff
changeset
|
11 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3680
diff
changeset
|
12 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3680
diff
changeset
|
13 * FFmpeg is free software; you can redistribute it and/or |
3680
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
14 * modify it under the terms of the GNU Lesser General Public |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
15 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3680
diff
changeset
|
16 * version 2.1 of the License, or (at your option) any later version. |
3680
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
17 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3680
diff
changeset
|
18 * FFmpeg is distributed in the hope that it will be useful, |
3680
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
21 * Lesser General Public License for more details. |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
22 * |
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
23 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3680
diff
changeset
|
24 * License along with FFmpeg; if not, write to the Free Software |
3680
7690bafea6e0
Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg.
diego
parents:
2979
diff
changeset
|
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
5215 | 26 */ |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
27 |
6763 | 28 #include "libavutil/common.h" |
29 #include "libavcodec/dsputil.h" | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
30 #include "mmi.h" |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
31 |
2979 | 32 #define BITS_INV_ACC 5 // 4 or 5 for IEEE |
33 #define SHIFT_INV_ROW (16 - BITS_INV_ACC) | |
696
477bcb3b2f0a
ps2 idct bugfix patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
34 #define SHIFT_INV_COL (1 + BITS_INV_ACC) |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
35 |
2979 | 36 #define TG1 6518 |
37 #define TG2 13573 | |
38 #define TG3 21895 | |
39 #define CS4 23170 | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
40 |
2979 | 41 #define ROUNDER_0 0 |
42 #define ROUNDER_1 16 | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
43 |
2979 | 44 #define TAB_i_04 (32+0) |
45 #define TAB_i_17 (32+64) | |
46 #define TAB_i_26 (32+128) | |
47 #define TAB_i_35 (32+192) | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
48 |
2979 | 49 #define TG_1_16 (32+256+0) |
50 #define TG_2_16 (32+256+16) | |
51 #define TG_3_16 (32+256+32) | |
52 #define COS_4_16 (32+256+48) | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
53 |
2979 | 54 #define CLIPMAX (32+256+64+0) |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
55 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
56 static short consttable[] align16 = { |
2979 | 57 /* rounder 0*/ // assume SHIFT_INV_ROW == 11 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
58 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
59 /* rounder 1*/ |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
60 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
61 /* row 0/4*/ |
2967 | 62 16384, 21407, -16384, -21407, 22725, 19266, -22725, -12873, |
63 8867, 16384, 8867, 16384, 4520, 12873, -4520, 19266, | |
64 16384, -8867, 16384, -8867, 12873, -22725, 19266, -22725, | |
65 21407, -16384, -21407, 16384, 19266, 4520, -12873, 4520, | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
66 /* row 1/7*/ |
2967 | 67 22725, 29692, -22725, -29692, 31521, 26722, -31521, -17855, |
68 12299, 22725, 12299, 22725, 6270, 17855, -6270, 26722, | |
69 22725, -12299, 22725, -12299, 17855, -31521, 26722, -31521, | |
70 29692, -22725, -29692, 22725, 26722, 6270, -17855, 6270, | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
71 /* row 2/6*/ |
2967 | 72 21407, 27969, -21407, -27969, 29692, 25172, -29692, -16819, |
73 11585, 21407, 11585, 21407, 5906, 16819, -5906, 25172, | |
74 21407, -11585, 21407, -11585, 16819, -29692, 25172, -29692, | |
75 27969, -21407, -27969, 21407, 25172, 5906, -16819, 5906, | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
76 /*row 3/5*/ |
2967 | 77 19266, 25172, -19266, -25172, 26722, 22654, -26722, -15137, |
78 10426, 19266, 10426, 19266, 5315, 15137, -5315, 22654, | |
79 19266, -10426, 19266, -10426, 15137, -26722, 22654, -26722, | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
80 25172, -19266, -25172, 19266, 22654, 5315, -15137, 5315, |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
81 /*column constants*/ |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
82 TG1, TG1, TG1, TG1, TG1, TG1, TG1, TG1, |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
83 TG2, TG2, TG2, TG2, TG2, TG2, TG2, TG2, |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
84 TG3, TG3, TG3, TG3, TG3, TG3, TG3, TG3, |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
85 CS4, CS4, CS4, CS4, CS4, CS4, CS4, CS4, |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
86 /* clamp */ |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
87 255, 255, 255, 255, 255, 255, 255, 255 |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
88 }; |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
89 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
90 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
91 #define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \ |
2979 | 92 lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \ |
93 /*slot*/ \ | |
94 lq($24, 0+taboff, $17); /* r17 = w */ \ | |
95 /*delay slot $16*/ \ | |
96 lq($24, 16+taboff, $18);/* r18 = w */ \ | |
97 prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \ | |
98 lq($24, 32+taboff, $19);/* r19 = w */ \ | |
99 phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \ | |
100 lq($24, 48+taboff, $20);/* r20 = w */ \ | |
101 phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \ | |
102 phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \ | |
103 phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \ | |
104 paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \ | |
105 paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \ | |
106 pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \ | |
107 pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \ | |
108 paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\ | |
109 paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \ | |
110 psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \ | |
111 psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \ | |
112 psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \ | |
113 ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \ | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
114 \ |
2979 | 115 prevh(outreg, $2); \ |
116 pcpyud($2, $2, $2); \ | |
117 pcpyld($2, outreg, outreg); \ | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
118 } |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
119 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
120 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
121 #define DCT_8_INV_COL8() \ |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
122 \ |
2979 | 123 lq($24, TG_3_16, $2); /* r2 = tn3 */ \ |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
124 \ |
2979 | 125 pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \ |
126 psraw($17, 15, $17); \ | |
127 pmfhl_uw($3); /* r3 = 7531 */ \ | |
128 psraw($3, 15, $3); \ | |
129 pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |
130 psubh($17, $13, $17); /* r17 = tm35 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
131 \ |
2979 | 132 pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \ |
133 psraw($18, 15, $18); \ | |
134 pmfhl_uw($3); /* r3 = 7531 */ \ | |
135 psraw($3, 15, $3); \ | |
136 pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \ | |
137 paddh($18, $11, $18); /* r18 = tp35 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
138 \ |
2979 | 139 lq($24, TG_1_16, $2); /* r2 = tn1 */ \ |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
140 \ |
2979 | 141 pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \ |
142 psraw($19, 15, $19); \ | |
143 pmfhl_uw($3); /* r3 = 7531 */ \ | |
144 psraw($3, 15, $3); \ | |
145 pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \ | |
146 paddh($19, $9, $19); /* r19 = tp17 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
147 \ |
2979 | 148 pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \ |
149 psraw($20, 15, $20); \ | |
150 pmfhl_uw($3); /* r3 = 7531 */ \ | |
151 psraw($3, 15, $3); \ | |
152 pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \ | |
153 psubh($20, $15, $20); /* r20 = tm17 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
154 \ |
2979 | 155 psubh($19, $18, $3); /* r3 = t1 */ \ |
156 paddh($20, $17, $16); /* r16 = t2 */ \ | |
157 psubh($20, $17, $23); /* r23 = b3 */ \ | |
158 paddh($19, $18, $20); /* r20 = b0 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
159 \ |
2979 | 160 lq($24, COS_4_16, $2); /* r2 = cs4 */ \ |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
161 \ |
2979 | 162 paddh($3, $16, $21); /* r21 = t1+t2 */ \ |
163 psubh($3, $16, $22); /* r22 = t1-t2 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
164 \ |
2979 | 165 pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \ |
166 psraw($21, 15, $21); \ | |
167 pmfhl_uw($3); /* r3 = 7531 */ \ | |
168 psraw($3, 15, $3); \ | |
169 pinteh($3, $21, $21); /* r21 = b1 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
170 \ |
2979 | 171 pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \ |
172 psraw($22, 15, $22); \ | |
173 pmfhl_uw($3); /* r3 = 7531 */ \ | |
174 psraw($3, 15, $3); \ | |
175 pinteh($3, $22, $22); /* r22 = b2 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
176 \ |
2979 | 177 lq($24, TG_2_16, $2); /* r2 = tn2 */ \ |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
178 \ |
2979 | 179 pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \ |
180 psraw($17, 15, $17); \ | |
181 pmfhl_uw($3); /* r3 = 7531 */ \ | |
182 psraw($3, 15, $3); \ | |
183 pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |
184 psubh($17, $14, $17); /* r17 = tm26 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
185 \ |
2979 | 186 pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \ |
187 psraw($18, 15, $18); \ | |
188 pmfhl_uw($3); /* r3 = 7531 */ \ | |
189 psraw($3, 15, $3); \ | |
190 pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \ | |
191 paddh($18, $10, $18); /* r18 = tp26 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
192 \ |
2979 | 193 paddh($8, $12, $2); /* r2 = tp04 */ \ |
194 psubh($8, $12, $3); /* r3 = tm04 */ \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
195 \ |
2979 | 196 paddh($2, $18, $16); /* r16 = a0 */ \ |
197 psubh($2, $18, $19); /* r19 = a3 */ \ | |
198 psubh($3, $17, $18); /* r18 = a2 */ \ | |
199 paddh($3, $17, $17); /* r17 = a1 */ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
200 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
201 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
202 #define DCT_8_INV_COL8_STORE(blk) \ |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
203 \ |
2979 | 204 paddh($16, $20, $2); /* y0 a0+b0 */ \ |
205 psubh($16, $20, $16); /* y7 a0-b0 */ \ | |
206 psrah($2, SHIFT_INV_COL, $2); \ | |
207 psrah($16, SHIFT_INV_COL, $16); \ | |
208 sq($2, 0, blk); \ | |
209 sq($16, 112, blk); \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
210 \ |
2979 | 211 paddh($17, $21, $3); /* y1 a1+b1 */ \ |
212 psubh($17, $21, $17); /* y6 a1-b1 */ \ | |
213 psrah($3, SHIFT_INV_COL, $3); \ | |
214 psrah($17, SHIFT_INV_COL, $17); \ | |
215 sq($3, 16, blk); \ | |
216 sq($17, 96, blk); \ | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
217 \ |
2979 | 218 paddh($18, $22, $2); /* y2 a2+b2 */ \ |
219 psubh($18, $22, $18); /* y5 a2-b2 */ \ | |
220 psrah($2, SHIFT_INV_COL, $2); \ | |
221 psrah($18, SHIFT_INV_COL, $18); \ | |
222 sq($2, 32, blk); \ | |
223 sq($18, 80, blk); \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
224 \ |
2979 | 225 paddh($19, $23, $3); /* y3 a3+b3 */ \ |
226 psubh($19, $23, $19); /* y4 a3-b3 */ \ | |
227 psrah($3, SHIFT_INV_COL, $3); \ | |
228 psrah($19, SHIFT_INV_COL, $19); \ | |
229 sq($3, 48, blk); \ | |
230 sq($19, 64, blk); | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
231 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
232 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
233 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
234 #define DCT_8_INV_COL8_PMS() \ |
2979 | 235 paddh($16, $20, $2); /* y0 a0+b0 */ \ |
236 psubh($16, $20, $20); /* y7 a0-b0 */ \ | |
237 psrah($2, SHIFT_INV_COL, $16); \ | |
238 psrah($20, SHIFT_INV_COL, $20); \ | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
239 \ |
2979 | 240 paddh($17, $21, $3); /* y1 a1+b1 */ \ |
241 psubh($17, $21, $21); /* y6 a1-b1 */ \ | |
242 psrah($3, SHIFT_INV_COL, $17); \ | |
243 psrah($21, SHIFT_INV_COL, $21); \ | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
244 \ |
2979 | 245 paddh($18, $22, $2); /* y2 a2+b2 */ \ |
246 psubh($18, $22, $22); /* y5 a2-b2 */ \ | |
247 psrah($2, SHIFT_INV_COL, $18); \ | |
248 psrah($22, SHIFT_INV_COL, $22); \ | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
249 \ |
2979 | 250 paddh($19, $23, $3); /* y3 a3+b3 */ \ |
251 psubh($19, $23, $23); /* y4 a3-b3 */ \ | |
252 psrah($3, SHIFT_INV_COL, $19); \ | |
253 psrah($23, SHIFT_INV_COL, $23); | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
254 |
2979 | 255 #define PUT(rs) \ |
256 pminh(rs, $11, $2); \ | |
257 pmaxh($2, $0, $2); \ | |
258 ppacb($0, $2, $2); \ | |
259 sd3(2, 0, 4); \ | |
6636 | 260 asm volatile ("add $4, $5, $4"); |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
261 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
262 #define DCT_8_INV_COL8_PUT() \ |
2979 | 263 PUT($16); \ |
264 PUT($17); \ | |
265 PUT($18); \ | |
266 PUT($19); \ | |
267 PUT($23); \ | |
268 PUT($22); \ | |
269 PUT($21); \ | |
270 PUT($20); | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
271 |
2979 | 272 #define ADD(rs) \ |
273 ld3(4, 0, 2); \ | |
274 pextlb($0, $2, $2); \ | |
275 paddh($2, rs, $2); \ | |
276 pminh($2, $11, $2); \ | |
277 pmaxh($2, $0, $2); \ | |
278 ppacb($0, $2, $2); \ | |
279 sd3(2, 0, 4); \ | |
6636 | 280 asm volatile ("add $4, $5, $4"); |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
281 |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
282 /*fixme: schedule*/ |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
283 #define DCT_8_INV_COL8_ADD() \ |
2979 | 284 ADD($16); \ |
285 ADD($17); \ | |
286 ADD($18); \ | |
287 ADD($19); \ | |
288 ADD($23); \ | |
289 ADD($22); \ | |
290 ADD($21); \ | |
291 ADD($20); | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
292 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
293 |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
294 void ff_mmi_idct(int16_t * block) |
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
295 { |
2979 | 296 /* $4 = block */ |
6636 | 297 asm volatile("la $24, %0"::"m"(consttable[0])); |
2979 | 298 lq($24, ROUNDER_0, $8); |
299 lq($24, ROUNDER_1, $7); | |
300 DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8); | |
301 DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9); | |
302 DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10); | |
303 DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11); | |
304 DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12); | |
305 DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13); | |
306 DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14); | |
307 DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15); | |
308 DCT_8_INV_COL8(); | |
309 DCT_8_INV_COL8_STORE($4); | |
2967 | 310 |
2979 | 311 //let savedtemp regs be saved |
6636 | 312 asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
313 } |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
314 |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
315 |
1064 | 316 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
317 { |
2979 | 318 /* $4 = dest, $5 = line_size, $6 = block */ |
6636 | 319 asm volatile("la $24, %0"::"m"(consttable[0])); |
2979 | 320 lq($24, ROUNDER_0, $8); |
321 lq($24, ROUNDER_1, $7); | |
322 DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |
323 DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |
324 DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |
325 DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |
326 DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |
327 DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |
328 DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |
329 DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |
330 DCT_8_INV_COL8(); | |
331 lq($24, CLIPMAX, $11); | |
332 DCT_8_INV_COL8_PMS(); | |
333 DCT_8_INV_COL8_PUT(); | |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
334 |
2979 | 335 //let savedtemp regs be saved |
6636 | 336 asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
diff
changeset
|
337 } |
696
477bcb3b2f0a
ps2 idct bugfix patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
689
diff
changeset
|
338 |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
339 |
1064 | 340 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
341 { |
2979 | 342 /* $4 = dest, $5 = line_size, $6 = block */ |
6636 | 343 asm volatile("la $24, %0"::"m"(consttable[0])); |
2979 | 344 lq($24, ROUNDER_0, $8); |
345 lq($24, ROUNDER_1, $7); | |
346 DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |
347 DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |
348 DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |
349 DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |
350 DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |
351 DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |
352 DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |
353 DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |
354 DCT_8_INV_COL8(); | |
355 lq($24, CLIPMAX, $11); | |
356 DCT_8_INV_COL8_PMS(); | |
357 DCT_8_INV_COL8_ADD(); | |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
358 |
2979 | 359 //let savedtemp regs be saved |
6636 | 360 asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); |
721
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
361 } |
71f669e9f633
ps2 optimizations update patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
696
diff
changeset
|
362 |