comparison sh4/idct_sh4.c @ 2979:bfabfdf9ce55 libavcodec

COSMETICS: tabs --> spaces, some prettyprinting
author diego
date Thu, 22 Dec 2005 01:10:11 +0000
parents ef2149182f1c
children 0b546eab515d
comparison
equal deleted inserted replaced
2978:403183bbb505 2979:bfabfdf9ce55
17 * License along with this library; if not, write to the Free Software 17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */ 19 */
20 20
21 #include "../dsputil.h" 21 #include "../dsputil.h"
22 #define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */ 22 #define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */
23 #define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */ 23 #define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */
24 #define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */ 24 #define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */
25 #define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */ 25 #define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */
26 #define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */ 26 #define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */
27 #define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */ 27 #define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */
28 #define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */ 28 #define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */
29 29
30 const static float even_table[] __attribute__ ((aligned(8))) = { 30 const static float even_table[] __attribute__ ((aligned(8))) = {
31 c4, c4, c4, c4, 31 c4, c4, c4, c4,
32 c2, c6,-c6,-c2, 32 c2, c6,-c6,-c2,
33 c4,-c4,-c4, c4, 33 c4,-c4,-c4, c4,
34 c6,-c2, c2,-c6 34 c6,-c2, c2,-c6
35 }; 35 };
36 36
37 const static float odd_table[] __attribute__ ((aligned(8))) = { 37 const static float odd_table[] __attribute__ ((aligned(8))) = {
38 c1, c3, c5, c7, 38 c1, c3, c5, c7,
39 c3,-c7,-c1,-c5, 39 c3,-c7,-c1,-c5,
40 c5,-c1, c7, c3, 40 c5,-c1, c7, c3,
41 c7,-c5, c3,-c1 41 c7,-c5, c3,-c1
42 }; 42 };
43 43
44 #undef c1 44 #undef c1
45 #undef c2 45 #undef c2
46 #undef c3 46 #undef c3
47 #undef c4 47 #undef c4
48 #undef c5 48 #undef c5
49 #undef c6 49 #undef c6
50 #undef c7 50 #undef c7
51 51
52 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) 52 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
53 53
54 #define load_matrix(table) \ 54 #define load_matrix(table) \
55 __asm__ volatile( \ 55 __asm__ volatile( \
56 " fschg\n" \ 56 " fschg\n" \
57 " fmov @%0+,xd0\n" \ 57 " fmov @%0+,xd0\n" \
58 " fmov @%0+,xd2\n" \ 58 " fmov @%0+,xd2\n" \
59 " fmov @%0+,xd4\n" \ 59 " fmov @%0+,xd4\n" \
60 " fmov @%0+,xd6\n" \ 60 " fmov @%0+,xd6\n" \
61 " fmov @%0+,xd8\n" \ 61 " fmov @%0+,xd8\n" \
62 " fmov @%0+,xd10\n" \ 62 " fmov @%0+,xd10\n" \
63 " fmov @%0+,xd12\n" \ 63 " fmov @%0+,xd12\n" \
64 " fmov @%0+,xd14\n" \ 64 " fmov @%0+,xd14\n" \
65 " fschg\n" \ 65 " fschg\n" \
66 :\ 66 :\
67 : "r"(table)\ 67 : "r"(table)\
68 : "0" \ 68 : "0" \
69 ) 69 )
70 70
71 #define ftrv() \ 71 #define ftrv() \
72 __asm__ volatile("ftrv xmtrx,fv0" \ 72 __asm__ volatile("ftrv xmtrx,fv0" \
73 : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ 73 : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
74 : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); 74 : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
75 75
76 #define DEFREG \ 76 #define DEFREG \
77 register float fr0 __asm__("fr0"); \ 77 register float fr0 __asm__("fr0"); \
78 register float fr1 __asm__("fr1"); \ 78 register float fr1 __asm__("fr1"); \
79 register float fr2 __asm__("fr2"); \ 79 register float fr2 __asm__("fr2"); \
80 register float fr3 __asm__("fr3") 80 register float fr3 __asm__("fr3")
81 81
82 #else 82 #else
83 83
84 /* generic C code for check */ 84 /* generic C code for check */
85 85
86 static void ftrv_(const float xf[],float fv[]) 86 static void ftrv_(const float xf[],float fv[])
87 { 87 {
88 float f0,f1,f2,f3; 88 float f0,f1,f2,f3;
89 f0 = fv[0]; 89 f0 = fv[0];
90 f1 = fv[1]; 90 f1 = fv[1];
91 f2 = fv[2]; 91 f2 = fv[2];
92 f3 = fv[3]; 92 f3 = fv[3];
93 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3; 93 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
94 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3; 94 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
95 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3; 95 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
96 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3; 96 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
97 } 97 }
98 98
99 static void load_matrix_(float xf[],const float table[]) 99 static void load_matrix_(float xf[],const float table[])
100 { 100 {
101 int i; 101 int i;
102 for(i=0;i<16;i++) xf[i]=table[i]; 102 for(i=0;i<16;i++) xf[i]=table[i];
103 } 103 }
104 104
105 #define ftrv() ftrv_(xf,fv) 105 #define ftrv() ftrv_(xf,fv)
106 #define load_matrix(table) load_matrix_(xf,table) 106 #define load_matrix(table) load_matrix_(xf,table)
107 107
108 #define DEFREG \ 108 #define DEFREG \
109 float fv[4],xf[16] 109 float fv[4],xf[16]
110 110
111 #define fr0 fv[0] 111 #define fr0 fv[0]
112 #define fr1 fv[1] 112 #define fr1 fv[1]
113 #define fr2 fv[2] 113 #define fr2 fv[2]
114 #define fr3 fv[3] 114 #define fr3 fv[3]
115 115
116 #endif 116 #endif
117 117
118 #if 1 118 #if 1
119 #define DESCALE(x,n) (x)*(1.0f/(1<<(n))) 119 #define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
120 #else 120 #else
121 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n)) 121 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n))
122 #endif 122 #endif
123 123
124 /* this code work worse on gcc cvs. 3.2.3 work fine */ 124 /* this code work worse on gcc cvs. 3.2.3 work fine */
125 125
126 126
127 #if 1 127 #if 1
128 //optimized 128 //optimized
129 129
130 void idct_sh4(DCTELEM *block) 130 void idct_sh4(DCTELEM *block)
131 { 131 {
132 DEFREG; 132 DEFREG;
133 133
134 int i; 134 int i;
135 float tblock[8*8],*fblock; 135 float tblock[8*8],*fblock;
136 int ofs1,ofs2,ofs3; 136 int ofs1,ofs2,ofs3;
137 137
138 #if defined(__SH4__) 138 #if defined(__SH4__)
139 #error "FIXME!! change to single float" 139 #error "FIXME!! change to single float"
140 #endif 140 #endif
141 141
142 /* row */ 142 /* row */
143 143
144 /* even part */ 144 /* even part */
145 load_matrix(even_table); 145 load_matrix(even_table);
146 146
147 fblock = tblock+4; 147 fblock = tblock+4;
148 i = 8; 148 i = 8;
149 do { 149 do {
150 fr0 = block[0]; 150 fr0 = block[0];
151 fr1 = block[2]; 151 fr1 = block[2];
152 fr2 = block[4]; 152 fr2 = block[4];
153 fr3 = block[6]; 153 fr3 = block[6];
154 block+=8; 154 block+=8;
155 ftrv(); 155 ftrv();
156 *--fblock = fr3; 156 *--fblock = fr3;
157 *--fblock = fr2; 157 *--fblock = fr2;
158 *--fblock = fr1; 158 *--fblock = fr1;
159 *--fblock = fr0; 159 *--fblock = fr0;
160 fblock+=8+4; 160 fblock+=8+4;
161 } while(--i); 161 } while(--i);
162 block-=8*8; 162 block-=8*8;
163 fblock-=8*8+4; 163 fblock-=8*8+4;
164 164
165 load_matrix(odd_table); 165 load_matrix(odd_table);
166 166
167 i = 8; 167 i = 8;
168 168
169 // ofs1 = sizeof(float)*1; 169 // ofs1 = sizeof(float)*1;
170 // ofs2 = sizeof(float)*2; 170 // ofs2 = sizeof(float)*2;
171 // ofs3 = sizeof(float)*3; 171 // ofs3 = sizeof(float)*3;
172 172
173 do { 173 do {
174 float t0,t1,t2,t3; 174 float t0,t1,t2,t3;
175 fr0 = block[1]; 175 fr0 = block[1];
176 fr1 = block[3]; 176 fr1 = block[3];
177 fr2 = block[5]; 177 fr2 = block[5];
178 fr3 = block[7]; 178 fr3 = block[7];
179 block+=8; 179 block+=8;
180 ftrv(); 180 ftrv();
181 t0 = *fblock++; 181 t0 = *fblock++;
182 t1 = *fblock++; 182 t1 = *fblock++;
183 t2 = *fblock++; 183 t2 = *fblock++;
184 t3 = *fblock++; 184 t3 = *fblock++;
185 fblock+=4; 185 fblock+=4;
186 *--fblock = t0 - fr0; 186 *--fblock = t0 - fr0;
187 *--fblock = t1 - fr1; 187 *--fblock = t1 - fr1;
188 *--fblock = t2 - fr2; 188 *--fblock = t2 - fr2;
189 *--fblock = t3 - fr3; 189 *--fblock = t3 - fr3;
190 *--fblock = t3 + fr3; 190 *--fblock = t3 + fr3;
191 *--fblock = t2 + fr2; 191 *--fblock = t2 + fr2;
192 *--fblock = t1 + fr1; 192 *--fblock = t1 + fr1;
193 *--fblock = t0 + fr0; 193 *--fblock = t0 + fr0;
194 fblock+=8; 194 fblock+=8;
195 } while(--i); 195 } while(--i);
196 block-=8*8; 196 block-=8*8;
197 fblock-=8*8; 197 fblock-=8*8;
198 198
199 /* col */ 199 /* col */
200 200
201 /* even part */ 201 /* even part */
202 load_matrix(even_table); 202 load_matrix(even_table);
203 203
204 ofs1 = sizeof(float)*2*8; 204 ofs1 = sizeof(float)*2*8;
205 ofs2 = sizeof(float)*4*8; 205 ofs2 = sizeof(float)*4*8;
206 ofs3 = sizeof(float)*6*8; 206 ofs3 = sizeof(float)*6*8;
207 207
208 i = 8; 208 i = 8;
209 209
210 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs) 210 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
211 211
212 do { 212 do {
213 fr0 = OA(fblock, 0); 213 fr0 = OA(fblock, 0);
214 fr1 = OA(fblock,ofs1); 214 fr1 = OA(fblock,ofs1);
215 fr2 = OA(fblock,ofs2); 215 fr2 = OA(fblock,ofs2);
216 fr3 = OA(fblock,ofs3); 216 fr3 = OA(fblock,ofs3);
217 ftrv(); 217 ftrv();
218 OA(fblock,0 ) = fr0; 218 OA(fblock,0 ) = fr0;
219 OA(fblock,ofs1) = fr1; 219 OA(fblock,ofs1) = fr1;
220 OA(fblock,ofs2) = fr2; 220 OA(fblock,ofs2) = fr2;
221 OA(fblock,ofs3) = fr3; 221 OA(fblock,ofs3) = fr3;
222 fblock++; 222 fblock++;
223 } while(--i); 223 } while(--i);
224 fblock-=8; 224 fblock-=8;
225 225
226 load_matrix(odd_table); 226 load_matrix(odd_table);
227 227
228 i=8; 228 i=8;
229 do { 229 do {
230 float t0,t1,t2,t3; 230 float t0,t1,t2,t3;
231 t0 = OA(fblock, 0); /* [8*0] */ 231 t0 = OA(fblock, 0); /* [8*0] */
232 t1 = OA(fblock,ofs1); /* [8*2] */ 232 t1 = OA(fblock,ofs1); /* [8*2] */
233 t2 = OA(fblock,ofs2); /* [8*4] */ 233 t2 = OA(fblock,ofs2); /* [8*4] */
234 t3 = OA(fblock,ofs3); /* [8*6] */ 234 t3 = OA(fblock,ofs3); /* [8*6] */
235 fblock+=8; 235 fblock+=8;
236 fr0 = OA(fblock, 0); /* [8*1] */ 236 fr0 = OA(fblock, 0); /* [8*1] */
237 fr1 = OA(fblock,ofs1); /* [8*3] */ 237 fr1 = OA(fblock,ofs1); /* [8*3] */
238 fr2 = OA(fblock,ofs2); /* [8*5] */ 238 fr2 = OA(fblock,ofs2); /* [8*5] */
239 fr3 = OA(fblock,ofs3); /* [8*7] */ 239 fr3 = OA(fblock,ofs3); /* [8*7] */
240 fblock+=-8+1; 240 fblock+=-8+1;
241 ftrv(); 241 ftrv();
242 block[8*0] = DESCALE(t0 + fr0,3); 242 block[8*0] = DESCALE(t0 + fr0,3);
243 block[8*7] = DESCALE(t0 - fr0,3); 243 block[8*7] = DESCALE(t0 - fr0,3);
244 block[8*1] = DESCALE(t1 + fr1,3); 244 block[8*1] = DESCALE(t1 + fr1,3);
245 block[8*6] = DESCALE(t1 - fr1,3); 245 block[8*6] = DESCALE(t1 - fr1,3);
246 block[8*2] = DESCALE(t2 + fr2,3); 246 block[8*2] = DESCALE(t2 + fr2,3);
247 block[8*5] = DESCALE(t2 - fr2,3); 247 block[8*5] = DESCALE(t2 - fr2,3);
248 block[8*3] = DESCALE(t3 + fr3,3); 248 block[8*3] = DESCALE(t3 + fr3,3);
249 block[8*4] = DESCALE(t3 - fr3,3); 249 block[8*4] = DESCALE(t3 - fr3,3);
250 block++; 250 block++;
251 } while(--i); 251 } while(--i);
252 252
253 #if defined(__SH4__) 253 #if defined(__SH4__)
254 #error "FIXME!! change to double" 254 #error "FIXME!! change to double"
255 #endif 255 #endif
256 } 256 }
257 #else 257 #else
258 void idct_sh4(DCTELEM *block) 258 void idct_sh4(DCTELEM *block)
259 { 259 {
260 DEFREG; 260 DEFREG;
261 261
262 int i; 262 int i;
263 float tblock[8*8],*fblock; 263 float tblock[8*8],*fblock;
264 264
265 /* row */ 265 /* row */
266 266
267 /* even part */ 267 /* even part */
268 load_matrix(even_table); 268 load_matrix(even_table);
269 269
270 fblock = tblock; 270 fblock = tblock;
271 i = 8; 271 i = 8;
272 do { 272 do {
273 fr0 = block[0]; 273 fr0 = block[0];
274 fr1 = block[2]; 274 fr1 = block[2];
275 fr2 = block[4]; 275 fr2 = block[4];
276 fr3 = block[6]; 276 fr3 = block[6];
277 block+=8; 277 block+=8;
278 ftrv(); 278 ftrv();
279 fblock[0] = fr0; 279 fblock[0] = fr0;
280 fblock[2] = fr1; 280 fblock[2] = fr1;
281 fblock[4] = fr2; 281 fblock[4] = fr2;
282 fblock[6] = fr3; 282 fblock[6] = fr3;
283 fblock+=8; 283 fblock+=8;
284 } while(--i); 284 } while(--i);
285 block-=8*8; 285 block-=8*8;
286 fblock-=8*8; 286 fblock-=8*8;
287 287
288 load_matrix(odd_table); 288 load_matrix(odd_table);
289 289
290 i = 8; 290 i = 8;
291 291
292 do { 292 do {
293 float t0,t1,t2,t3; 293 float t0,t1,t2,t3;
294 fr0 = block[1]; 294 fr0 = block[1];
295 fr1 = block[3]; 295 fr1 = block[3];
296 fr2 = block[5]; 296 fr2 = block[5];
297 fr3 = block[7]; 297 fr3 = block[7];
298 block+=8; 298 block+=8;
299 ftrv(); 299 ftrv();
300 t0 = fblock[0]; 300 t0 = fblock[0];
301 t1 = fblock[2]; 301 t1 = fblock[2];
302 t2 = fblock[4]; 302 t2 = fblock[4];
303 t3 = fblock[6]; 303 t3 = fblock[6];
304 fblock[0] = t0 + fr0; 304 fblock[0] = t0 + fr0;
305 fblock[7] = t0 - fr0; 305 fblock[7] = t0 - fr0;
306 fblock[1] = t1 + fr1; 306 fblock[1] = t1 + fr1;
307 fblock[6] = t1 - fr1; 307 fblock[6] = t1 - fr1;
308 fblock[2] = t2 + fr2; 308 fblock[2] = t2 + fr2;
309 fblock[5] = t2 - fr2; 309 fblock[5] = t2 - fr2;
310 fblock[3] = t3 + fr3; 310 fblock[3] = t3 + fr3;
311 fblock[4] = t3 - fr3; 311 fblock[4] = t3 - fr3;
312 fblock+=8; 312 fblock+=8;
313 } while(--i); 313 } while(--i);
314 block-=8*8; 314 block-=8*8;
315 fblock-=8*8; 315 fblock-=8*8;
316 316
317 /* col */ 317 /* col */
318 318
319 /* even part */ 319 /* even part */
320 load_matrix(even_table); 320 load_matrix(even_table);
321 321
322 i = 8; 322 i = 8;
323 323
324 do { 324 do {
325 fr0 = fblock[8*0]; 325 fr0 = fblock[8*0];
326 fr1 = fblock[8*2]; 326 fr1 = fblock[8*2];
327 fr2 = fblock[8*4]; 327 fr2 = fblock[8*4];
328 fr3 = fblock[8*6]; 328 fr3 = fblock[8*6];
329 ftrv(); 329 ftrv();
330 fblock[8*0] = fr0; 330 fblock[8*0] = fr0;
331 fblock[8*2] = fr1; 331 fblock[8*2] = fr1;
332 fblock[8*4] = fr2; 332 fblock[8*4] = fr2;
333 fblock[8*6] = fr3; 333 fblock[8*6] = fr3;
334 fblock++; 334 fblock++;
335 } while(--i); 335 } while(--i);
336 fblock-=8; 336 fblock-=8;
337 337
338 load_matrix(odd_table); 338 load_matrix(odd_table);
339 339
340 i=8; 340 i=8;
341 do { 341 do {
342 float t0,t1,t2,t3; 342 float t0,t1,t2,t3;
343 fr0 = fblock[8*1]; 343 fr0 = fblock[8*1];
344 fr1 = fblock[8*3]; 344 fr1 = fblock[8*3];
345 fr2 = fblock[8*5]; 345 fr2 = fblock[8*5];
346 fr3 = fblock[8*7]; 346 fr3 = fblock[8*7];
347 ftrv(); 347 ftrv();
348 t0 = fblock[8*0]; 348 t0 = fblock[8*0];
349 t1 = fblock[8*2]; 349 t1 = fblock[8*2];
350 t2 = fblock[8*4]; 350 t2 = fblock[8*4];
351 t3 = fblock[8*6]; 351 t3 = fblock[8*6];
352 fblock++; 352 fblock++;
353 block[8*0] = DESCALE(t0 + fr0,3); 353 block[8*0] = DESCALE(t0 + fr0,3);
354 block[8*7] = DESCALE(t0 - fr0,3); 354 block[8*7] = DESCALE(t0 - fr0,3);
355 block[8*1] = DESCALE(t1 + fr1,3); 355 block[8*1] = DESCALE(t1 + fr1,3);
356 block[8*6] = DESCALE(t1 - fr1,3); 356 block[8*6] = DESCALE(t1 - fr1,3);
357 block[8*2] = DESCALE(t2 + fr2,3); 357 block[8*2] = DESCALE(t2 + fr2,3);
358 block[8*5] = DESCALE(t2 - fr2,3); 358 block[8*5] = DESCALE(t2 - fr2,3);
359 block[8*3] = DESCALE(t3 + fr3,3); 359 block[8*3] = DESCALE(t3 + fr3,3);
360 block[8*4] = DESCALE(t3 - fr3,3); 360 block[8*4] = DESCALE(t3 - fr3,3);
361 block++; 361 block++;
362 } while(--i); 362 } while(--i);
363 } 363 }
364 #endif 364 #endif