Mercurial > libavcodec.hg
annotate sh4/idct_sh4.c @ 3990:746a60ba3177 libavcodec
enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author | michael |
---|---|
date | Wed, 11 Oct 2006 12:23:40 +0000 |
parents | c8c591fe26f8 |
children | d5ba514e3f4a |
rev | line source |
---|---|
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
1 /* |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
2 * idct for sh4 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
3 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
4 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
5 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
6 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
7 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
12 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
17 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
21 */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
22 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
23 #include "../dsputil.h" |
2979 | 24 #define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */ |
25 #define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */ | |
26 #define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */ | |
27 #define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */ | |
28 #define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */ | |
29 #define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */ | |
30 #define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */ | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
31 |
3556
6805c1e5507e
Fix some "'static' is not at beginning of declaration" warnings.
diego
parents:
3036
diff
changeset
|
32 static const float even_table[] __attribute__ ((aligned(8))) = { |
2979 | 33 c4, c4, c4, c4, |
34 c2, c6,-c6,-c2, | |
35 c4,-c4,-c4, c4, | |
36 c6,-c2, c2,-c6 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
37 }; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
38 |
3556
6805c1e5507e
Fix some "'static' is not at beginning of declaration" warnings.
diego
parents:
3036
diff
changeset
|
39 static const float odd_table[] __attribute__ ((aligned(8))) = { |
2979 | 40 c1, c3, c5, c7, |
41 c3,-c7,-c1,-c5, | |
42 c5,-c1, c7, c3, | |
43 c7,-c5, c3,-c1 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
44 }; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
45 |
2979 | 46 #undef c1 |
47 #undef c2 | |
48 #undef c3 | |
49 #undef c4 | |
50 #undef c5 | |
51 #undef c6 | |
52 #undef c7 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
53 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
54 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
55 |
2979 | 56 #define load_matrix(table) \ |
57 __asm__ volatile( \ | |
58 " fschg\n" \ | |
59 " fmov @%0+,xd0\n" \ | |
60 " fmov @%0+,xd2\n" \ | |
61 " fmov @%0+,xd4\n" \ | |
62 " fmov @%0+,xd6\n" \ | |
63 " fmov @%0+,xd8\n" \ | |
64 " fmov @%0+,xd10\n" \ | |
65 " fmov @%0+,xd12\n" \ | |
66 " fmov @%0+,xd14\n" \ | |
67 " fschg\n" \ | |
68 :\ | |
69 : "r"(table)\ | |
70 : "0" \ | |
71 ) | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
72 |
2979 | 73 #define ftrv() \ |
74 __asm__ volatile("ftrv xmtrx,fv0" \ | |
75 : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ | |
76 : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
77 |
2979 | 78 #define DEFREG \ |
79 register float fr0 __asm__("fr0"); \ | |
80 register float fr1 __asm__("fr1"); \ | |
81 register float fr2 __asm__("fr2"); \ | |
82 register float fr3 __asm__("fr3") | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
83 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
84 #else |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
85 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
86 /* generic C code for check */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
87 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
88 static void ftrv_(const float xf[],float fv[]) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
89 { |
2979 | 90 float f0,f1,f2,f3; |
91 f0 = fv[0]; | |
92 f1 = fv[1]; | |
93 f2 = fv[2]; | |
94 f3 = fv[3]; | |
95 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3; | |
96 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3; | |
97 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3; | |
98 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
99 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
100 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
101 static void load_matrix_(float xf[],const float table[]) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
102 { |
2979 | 103 int i; |
104 for(i=0;i<16;i++) xf[i]=table[i]; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
105 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
106 |
2979 | 107 #define ftrv() ftrv_(xf,fv) |
108 #define load_matrix(table) load_matrix_(xf,table) | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
109 |
2979 | 110 #define DEFREG \ |
111 float fv[4],xf[16] | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
112 |
2979 | 113 #define fr0 fv[0] |
114 #define fr1 fv[1] | |
115 #define fr2 fv[2] | |
116 #define fr3 fv[3] | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
117 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
118 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
119 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
120 #if 1 |
2979 | 121 #define DESCALE(x,n) (x)*(1.0f/(1<<(n))) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
122 #else |
2979 | 123 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n)) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
124 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
125 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
126 /* this code work worse on gcc cvs. 3.2.3 work fine */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
127 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
128 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
129 #if 1 |
2967 | 130 //optimized |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
131 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
132 void idct_sh4(DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
133 { |
2979 | 134 DEFREG; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
135 |
2979 | 136 int i; |
137 float tblock[8*8],*fblock; | |
138 int ofs1,ofs2,ofs3; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
139 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
140 #if defined(__SH4__) |
2979 | 141 #error "FIXME!! change to single float" |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
142 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
143 |
2979 | 144 /* row */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
145 |
2979 | 146 /* even part */ |
147 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
148 |
2979 | 149 fblock = tblock+4; |
150 i = 8; | |
151 do { | |
152 fr0 = block[0]; | |
153 fr1 = block[2]; | |
154 fr2 = block[4]; | |
155 fr3 = block[6]; | |
156 block+=8; | |
157 ftrv(); | |
158 *--fblock = fr3; | |
159 *--fblock = fr2; | |
160 *--fblock = fr1; | |
161 *--fblock = fr0; | |
162 fblock+=8+4; | |
163 } while(--i); | |
164 block-=8*8; | |
165 fblock-=8*8+4; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
166 |
2979 | 167 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
168 |
2979 | 169 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
170 |
2979 | 171 // ofs1 = sizeof(float)*1; |
172 // ofs2 = sizeof(float)*2; | |
173 // ofs3 = sizeof(float)*3; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
174 |
2979 | 175 do { |
176 float t0,t1,t2,t3; | |
177 fr0 = block[1]; | |
178 fr1 = block[3]; | |
179 fr2 = block[5]; | |
180 fr3 = block[7]; | |
181 block+=8; | |
182 ftrv(); | |
183 t0 = *fblock++; | |
184 t1 = *fblock++; | |
185 t2 = *fblock++; | |
186 t3 = *fblock++; | |
187 fblock+=4; | |
188 *--fblock = t0 - fr0; | |
189 *--fblock = t1 - fr1; | |
190 *--fblock = t2 - fr2; | |
191 *--fblock = t3 - fr3; | |
192 *--fblock = t3 + fr3; | |
193 *--fblock = t2 + fr2; | |
194 *--fblock = t1 + fr1; | |
195 *--fblock = t0 + fr0; | |
196 fblock+=8; | |
197 } while(--i); | |
198 block-=8*8; | |
199 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
200 |
2979 | 201 /* col */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
202 |
2979 | 203 /* even part */ |
204 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
205 |
2979 | 206 ofs1 = sizeof(float)*2*8; |
207 ofs2 = sizeof(float)*4*8; | |
208 ofs3 = sizeof(float)*6*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
209 |
2979 | 210 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
211 |
2979 | 212 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
213 |
2979 | 214 do { |
215 fr0 = OA(fblock, 0); | |
216 fr1 = OA(fblock,ofs1); | |
217 fr2 = OA(fblock,ofs2); | |
218 fr3 = OA(fblock,ofs3); | |
219 ftrv(); | |
220 OA(fblock,0 ) = fr0; | |
221 OA(fblock,ofs1) = fr1; | |
222 OA(fblock,ofs2) = fr2; | |
223 OA(fblock,ofs3) = fr3; | |
224 fblock++; | |
225 } while(--i); | |
226 fblock-=8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
227 |
2979 | 228 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
229 |
2979 | 230 i=8; |
231 do { | |
232 float t0,t1,t2,t3; | |
233 t0 = OA(fblock, 0); /* [8*0] */ | |
234 t1 = OA(fblock,ofs1); /* [8*2] */ | |
235 t2 = OA(fblock,ofs2); /* [8*4] */ | |
236 t3 = OA(fblock,ofs3); /* [8*6] */ | |
237 fblock+=8; | |
238 fr0 = OA(fblock, 0); /* [8*1] */ | |
239 fr1 = OA(fblock,ofs1); /* [8*3] */ | |
240 fr2 = OA(fblock,ofs2); /* [8*5] */ | |
241 fr3 = OA(fblock,ofs3); /* [8*7] */ | |
242 fblock+=-8+1; | |
243 ftrv(); | |
244 block[8*0] = DESCALE(t0 + fr0,3); | |
245 block[8*7] = DESCALE(t0 - fr0,3); | |
246 block[8*1] = DESCALE(t1 + fr1,3); | |
247 block[8*6] = DESCALE(t1 - fr1,3); | |
248 block[8*2] = DESCALE(t2 + fr2,3); | |
249 block[8*5] = DESCALE(t2 - fr2,3); | |
250 block[8*3] = DESCALE(t3 + fr3,3); | |
251 block[8*4] = DESCALE(t3 - fr3,3); | |
252 block++; | |
253 } while(--i); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
254 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
255 #if defined(__SH4__) |
2979 | 256 #error "FIXME!! change to double" |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
257 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
258 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
259 #else |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
260 void idct_sh4(DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
261 { |
2979 | 262 DEFREG; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
263 |
2979 | 264 int i; |
265 float tblock[8*8],*fblock; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
266 |
2979 | 267 /* row */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
268 |
2979 | 269 /* even part */ |
270 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
271 |
2979 | 272 fblock = tblock; |
273 i = 8; | |
274 do { | |
275 fr0 = block[0]; | |
276 fr1 = block[2]; | |
277 fr2 = block[4]; | |
278 fr3 = block[6]; | |
279 block+=8; | |
280 ftrv(); | |
281 fblock[0] = fr0; | |
282 fblock[2] = fr1; | |
283 fblock[4] = fr2; | |
284 fblock[6] = fr3; | |
285 fblock+=8; | |
286 } while(--i); | |
287 block-=8*8; | |
288 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
289 |
2979 | 290 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
291 |
2979 | 292 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
293 |
2979 | 294 do { |
295 float t0,t1,t2,t3; | |
296 fr0 = block[1]; | |
297 fr1 = block[3]; | |
298 fr2 = block[5]; | |
299 fr3 = block[7]; | |
300 block+=8; | |
301 ftrv(); | |
302 t0 = fblock[0]; | |
303 t1 = fblock[2]; | |
304 t2 = fblock[4]; | |
305 t3 = fblock[6]; | |
306 fblock[0] = t0 + fr0; | |
307 fblock[7] = t0 - fr0; | |
308 fblock[1] = t1 + fr1; | |
309 fblock[6] = t1 - fr1; | |
310 fblock[2] = t2 + fr2; | |
311 fblock[5] = t2 - fr2; | |
312 fblock[3] = t3 + fr3; | |
313 fblock[4] = t3 - fr3; | |
314 fblock+=8; | |
315 } while(--i); | |
316 block-=8*8; | |
317 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
318 |
2979 | 319 /* col */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
320 |
2979 | 321 /* even part */ |
322 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
323 |
2979 | 324 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
325 |
2979 | 326 do { |
327 fr0 = fblock[8*0]; | |
328 fr1 = fblock[8*2]; | |
329 fr2 = fblock[8*4]; | |
330 fr3 = fblock[8*6]; | |
331 ftrv(); | |
332 fblock[8*0] = fr0; | |
333 fblock[8*2] = fr1; | |
334 fblock[8*4] = fr2; | |
335 fblock[8*6] = fr3; | |
336 fblock++; | |
337 } while(--i); | |
338 fblock-=8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
339 |
2979 | 340 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
341 |
2979 | 342 i=8; |
343 do { | |
344 float t0,t1,t2,t3; | |
345 fr0 = fblock[8*1]; | |
346 fr1 = fblock[8*3]; | |
347 fr2 = fblock[8*5]; | |
348 fr3 = fblock[8*7]; | |
349 ftrv(); | |
350 t0 = fblock[8*0]; | |
351 t1 = fblock[8*2]; | |
352 t2 = fblock[8*4]; | |
353 t3 = fblock[8*6]; | |
354 fblock++; | |
355 block[8*0] = DESCALE(t0 + fr0,3); | |
356 block[8*7] = DESCALE(t0 - fr0,3); | |
357 block[8*1] = DESCALE(t1 + fr1,3); | |
358 block[8*6] = DESCALE(t1 - fr1,3); | |
359 block[8*2] = DESCALE(t2 + fr2,3); | |
360 block[8*5] = DESCALE(t2 - fr2,3); | |
361 block[8*3] = DESCALE(t3 + fr3,3); | |
362 block[8*4] = DESCALE(t3 - fr3,3); | |
363 block++; | |
364 } while(--i); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
365 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
366 #endif |