Mercurial > libavcodec.hg
annotate sh4/idct_sh4.c @ 2996:bf34de4233a0 libavcodec
update x264 wrapper.
patch by Robert Swain.
author | lorenm |
---|---|
date | Thu, 29 Dec 2005 07:24:23 +0000 |
parents | bfabfdf9ce55 |
children | 0b546eab515d |
rev | line source |
---|---|
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
1 /* |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
2 * idct for sh4 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
3 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
4 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
5 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
6 * This library is free software; you can redistribute it and/or |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
9 * version 2 of the License, or (at your option) any later version. |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
10 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
11 * This library is distributed in the hope that it will be useful, |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
15 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
17 * License along with this library; if not, write to the Free Software |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
19 */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
20 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
21 #include "../dsputil.h" |
2979 | 22 #define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */ |
23 #define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */ | |
24 #define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */ | |
25 #define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */ | |
26 #define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */ | |
27 #define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */ | |
28 #define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */ | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
29 |
2979 | 30 const static float even_table[] __attribute__ ((aligned(8))) = { |
31 c4, c4, c4, c4, | |
32 c2, c6,-c6,-c2, | |
33 c4,-c4,-c4, c4, | |
34 c6,-c2, c2,-c6 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
35 }; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
36 |
2979 | 37 const static float odd_table[] __attribute__ ((aligned(8))) = { |
38 c1, c3, c5, c7, | |
39 c3,-c7,-c1,-c5, | |
40 c5,-c1, c7, c3, | |
41 c7,-c5, c3,-c1 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
42 }; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
43 |
2979 | 44 #undef c1 |
45 #undef c2 | |
46 #undef c3 | |
47 #undef c4 | |
48 #undef c5 | |
49 #undef c6 | |
50 #undef c7 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
51 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
52 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
53 |
2979 | 54 #define load_matrix(table) \ |
55 __asm__ volatile( \ | |
56 " fschg\n" \ | |
57 " fmov @%0+,xd0\n" \ | |
58 " fmov @%0+,xd2\n" \ | |
59 " fmov @%0+,xd4\n" \ | |
60 " fmov @%0+,xd6\n" \ | |
61 " fmov @%0+,xd8\n" \ | |
62 " fmov @%0+,xd10\n" \ | |
63 " fmov @%0+,xd12\n" \ | |
64 " fmov @%0+,xd14\n" \ | |
65 " fschg\n" \ | |
66 :\ | |
67 : "r"(table)\ | |
68 : "0" \ | |
69 ) | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
70 |
2979 | 71 #define ftrv() \ |
72 __asm__ volatile("ftrv xmtrx,fv0" \ | |
73 : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ | |
74 : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
75 |
2979 | 76 #define DEFREG \ |
77 register float fr0 __asm__("fr0"); \ | |
78 register float fr1 __asm__("fr1"); \ | |
79 register float fr2 __asm__("fr2"); \ | |
80 register float fr3 __asm__("fr3") | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
81 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
82 #else |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
83 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
84 /* generic C code for check */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
85 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
86 static void ftrv_(const float xf[],float fv[]) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
87 { |
2979 | 88 float f0,f1,f2,f3; |
89 f0 = fv[0]; | |
90 f1 = fv[1]; | |
91 f2 = fv[2]; | |
92 f3 = fv[3]; | |
93 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3; | |
94 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3; | |
95 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3; | |
96 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
97 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
98 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
99 static void load_matrix_(float xf[],const float table[]) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
100 { |
2979 | 101 int i; |
102 for(i=0;i<16;i++) xf[i]=table[i]; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
103 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
104 |
2979 | 105 #define ftrv() ftrv_(xf,fv) |
106 #define load_matrix(table) load_matrix_(xf,table) | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
107 |
2979 | 108 #define DEFREG \ |
109 float fv[4],xf[16] | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
110 |
2979 | 111 #define fr0 fv[0] |
112 #define fr1 fv[1] | |
113 #define fr2 fv[2] | |
114 #define fr3 fv[3] | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
115 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
116 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
117 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
118 #if 1 |
2979 | 119 #define DESCALE(x,n) (x)*(1.0f/(1<<(n))) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
120 #else |
2979 | 121 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n)) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
122 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
123 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
124 /* this code work worse on gcc cvs. 3.2.3 work fine */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
125 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
126 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
127 #if 1 |
2967 | 128 //optimized |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
129 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
130 void idct_sh4(DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
131 { |
2979 | 132 DEFREG; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
133 |
2979 | 134 int i; |
135 float tblock[8*8],*fblock; | |
136 int ofs1,ofs2,ofs3; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
137 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
138 #if defined(__SH4__) |
2979 | 139 #error "FIXME!! change to single float" |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
140 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
141 |
2979 | 142 /* row */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
143 |
2979 | 144 /* even part */ |
145 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
146 |
2979 | 147 fblock = tblock+4; |
148 i = 8; | |
149 do { | |
150 fr0 = block[0]; | |
151 fr1 = block[2]; | |
152 fr2 = block[4]; | |
153 fr3 = block[6]; | |
154 block+=8; | |
155 ftrv(); | |
156 *--fblock = fr3; | |
157 *--fblock = fr2; | |
158 *--fblock = fr1; | |
159 *--fblock = fr0; | |
160 fblock+=8+4; | |
161 } while(--i); | |
162 block-=8*8; | |
163 fblock-=8*8+4; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
164 |
2979 | 165 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
166 |
2979 | 167 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
168 |
2979 | 169 // ofs1 = sizeof(float)*1; |
170 // ofs2 = sizeof(float)*2; | |
171 // ofs3 = sizeof(float)*3; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
172 |
2979 | 173 do { |
174 float t0,t1,t2,t3; | |
175 fr0 = block[1]; | |
176 fr1 = block[3]; | |
177 fr2 = block[5]; | |
178 fr3 = block[7]; | |
179 block+=8; | |
180 ftrv(); | |
181 t0 = *fblock++; | |
182 t1 = *fblock++; | |
183 t2 = *fblock++; | |
184 t3 = *fblock++; | |
185 fblock+=4; | |
186 *--fblock = t0 - fr0; | |
187 *--fblock = t1 - fr1; | |
188 *--fblock = t2 - fr2; | |
189 *--fblock = t3 - fr3; | |
190 *--fblock = t3 + fr3; | |
191 *--fblock = t2 + fr2; | |
192 *--fblock = t1 + fr1; | |
193 *--fblock = t0 + fr0; | |
194 fblock+=8; | |
195 } while(--i); | |
196 block-=8*8; | |
197 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
198 |
2979 | 199 /* col */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
200 |
2979 | 201 /* even part */ |
202 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
203 |
2979 | 204 ofs1 = sizeof(float)*2*8; |
205 ofs2 = sizeof(float)*4*8; | |
206 ofs3 = sizeof(float)*6*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
207 |
2979 | 208 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
209 |
2979 | 210 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
211 |
2979 | 212 do { |
213 fr0 = OA(fblock, 0); | |
214 fr1 = OA(fblock,ofs1); | |
215 fr2 = OA(fblock,ofs2); | |
216 fr3 = OA(fblock,ofs3); | |
217 ftrv(); | |
218 OA(fblock,0 ) = fr0; | |
219 OA(fblock,ofs1) = fr1; | |
220 OA(fblock,ofs2) = fr2; | |
221 OA(fblock,ofs3) = fr3; | |
222 fblock++; | |
223 } while(--i); | |
224 fblock-=8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
225 |
2979 | 226 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
227 |
2979 | 228 i=8; |
229 do { | |
230 float t0,t1,t2,t3; | |
231 t0 = OA(fblock, 0); /* [8*0] */ | |
232 t1 = OA(fblock,ofs1); /* [8*2] */ | |
233 t2 = OA(fblock,ofs2); /* [8*4] */ | |
234 t3 = OA(fblock,ofs3); /* [8*6] */ | |
235 fblock+=8; | |
236 fr0 = OA(fblock, 0); /* [8*1] */ | |
237 fr1 = OA(fblock,ofs1); /* [8*3] */ | |
238 fr2 = OA(fblock,ofs2); /* [8*5] */ | |
239 fr3 = OA(fblock,ofs3); /* [8*7] */ | |
240 fblock+=-8+1; | |
241 ftrv(); | |
242 block[8*0] = DESCALE(t0 + fr0,3); | |
243 block[8*7] = DESCALE(t0 - fr0,3); | |
244 block[8*1] = DESCALE(t1 + fr1,3); | |
245 block[8*6] = DESCALE(t1 - fr1,3); | |
246 block[8*2] = DESCALE(t2 + fr2,3); | |
247 block[8*5] = DESCALE(t2 - fr2,3); | |
248 block[8*3] = DESCALE(t3 + fr3,3); | |
249 block[8*4] = DESCALE(t3 - fr3,3); | |
250 block++; | |
251 } while(--i); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
252 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
253 #if defined(__SH4__) |
2979 | 254 #error "FIXME!! change to double" |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
255 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
256 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
257 #else |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
258 void idct_sh4(DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
259 { |
2979 | 260 DEFREG; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
261 |
2979 | 262 int i; |
263 float tblock[8*8],*fblock; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
264 |
2979 | 265 /* row */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
266 |
2979 | 267 /* even part */ |
268 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
269 |
2979 | 270 fblock = tblock; |
271 i = 8; | |
272 do { | |
273 fr0 = block[0]; | |
274 fr1 = block[2]; | |
275 fr2 = block[4]; | |
276 fr3 = block[6]; | |
277 block+=8; | |
278 ftrv(); | |
279 fblock[0] = fr0; | |
280 fblock[2] = fr1; | |
281 fblock[4] = fr2; | |
282 fblock[6] = fr3; | |
283 fblock+=8; | |
284 } while(--i); | |
285 block-=8*8; | |
286 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
287 |
2979 | 288 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
289 |
2979 | 290 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
291 |
2979 | 292 do { |
293 float t0,t1,t2,t3; | |
294 fr0 = block[1]; | |
295 fr1 = block[3]; | |
296 fr2 = block[5]; | |
297 fr3 = block[7]; | |
298 block+=8; | |
299 ftrv(); | |
300 t0 = fblock[0]; | |
301 t1 = fblock[2]; | |
302 t2 = fblock[4]; | |
303 t3 = fblock[6]; | |
304 fblock[0] = t0 + fr0; | |
305 fblock[7] = t0 - fr0; | |
306 fblock[1] = t1 + fr1; | |
307 fblock[6] = t1 - fr1; | |
308 fblock[2] = t2 + fr2; | |
309 fblock[5] = t2 - fr2; | |
310 fblock[3] = t3 + fr3; | |
311 fblock[4] = t3 - fr3; | |
312 fblock+=8; | |
313 } while(--i); | |
314 block-=8*8; | |
315 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
316 |
2979 | 317 /* col */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
318 |
2979 | 319 /* even part */ |
320 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
321 |
2979 | 322 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
323 |
2979 | 324 do { |
325 fr0 = fblock[8*0]; | |
326 fr1 = fblock[8*2]; | |
327 fr2 = fblock[8*4]; | |
328 fr3 = fblock[8*6]; | |
329 ftrv(); | |
330 fblock[8*0] = fr0; | |
331 fblock[8*2] = fr1; | |
332 fblock[8*4] = fr2; | |
333 fblock[8*6] = fr3; | |
334 fblock++; | |
335 } while(--i); | |
336 fblock-=8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
337 |
2979 | 338 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
339 |
2979 | 340 i=8; |
341 do { | |
342 float t0,t1,t2,t3; | |
343 fr0 = fblock[8*1]; | |
344 fr1 = fblock[8*3]; | |
345 fr2 = fblock[8*5]; | |
346 fr3 = fblock[8*7]; | |
347 ftrv(); | |
348 t0 = fblock[8*0]; | |
349 t1 = fblock[8*2]; | |
350 t2 = fblock[8*4]; | |
351 t3 = fblock[8*6]; | |
352 fblock++; | |
353 block[8*0] = DESCALE(t0 + fr0,3); | |
354 block[8*7] = DESCALE(t0 - fr0,3); | |
355 block[8*1] = DESCALE(t1 + fr1,3); | |
356 block[8*6] = DESCALE(t1 - fr1,3); | |
357 block[8*2] = DESCALE(t2 + fr2,3); | |
358 block[8*5] = DESCALE(t2 - fr2,3); | |
359 block[8*3] = DESCALE(t3 + fr3,3); | |
360 block[8*4] = DESCALE(t3 - fr3,3); | |
361 block++; | |
362 } while(--i); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
363 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
364 #endif |