Mercurial > libavcodec.hg
annotate sh4/idct_sh4.c @ 10952:ea8f891d997d libavcodec
H264 DXVA2 implementation
It allows VLD H264 decoding using DXVA2 (GPU assisted decoding API under
VISTA and Windows 7).
It is implemented by using AVHWAccel API. It has been tested successfully
for some time in VLC using an nvidia card on Windows 7.
To compile it, you need to have the system header dxva2api.h (either from
microsoft or using http://downloads.videolan.org/pub/videolan/testing/contrib/dxva2api.h)
The generated libavcodec.dll does not depend directly on any new lib as
the necessary objects are given by the application using FFmpeg.
author | fenrir |
---|---|
date | Wed, 20 Jan 2010 18:54:51 +0000 |
parents | cf5183ff0417 |
children | c166792100a0 |
rev | line source |
---|---|
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
1 /* |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
2 * idct for sh4 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
3 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
4 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
5 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
6 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
7 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
12 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
17 * |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3556
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
21 */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
22 |
6763 | 23 #include "libavcodec/dsputil.h" |
8187 | 24 #include "sh4.h" |
25 | |
2979 | 26 #define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */ |
27 #define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */ | |
28 #define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */ | |
29 #define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */ | |
30 #define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */ | |
31 #define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */ | |
32 #define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */ | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
33 |
3556
6805c1e5507e
Fix some "'static' is not at beginning of declaration" warnings.
diego
parents:
3036
diff
changeset
|
34 static const float even_table[] __attribute__ ((aligned(8))) = { |
2979 | 35 c4, c4, c4, c4, |
36 c2, c6,-c6,-c2, | |
37 c4,-c4,-c4, c4, | |
38 c6,-c2, c2,-c6 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
39 }; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
40 |
3556
6805c1e5507e
Fix some "'static' is not at beginning of declaration" warnings.
diego
parents:
3036
diff
changeset
|
41 static const float odd_table[] __attribute__ ((aligned(8))) = { |
2979 | 42 c1, c3, c5, c7, |
43 c3,-c7,-c1,-c5, | |
44 c5,-c1, c7, c3, | |
45 c7,-c5, c3,-c1 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
46 }; |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
47 |
2979 | 48 #undef c1 |
49 #undef c2 | |
50 #undef c3 | |
51 #undef c4 | |
52 #undef c5 | |
53 #undef c6 | |
54 #undef c7 | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
55 |
8187 | 56 #if 1 |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
57 |
2979 | 58 #define load_matrix(table) \ |
8187 | 59 do { \ |
60 const float *t = table; \ | |
8031 | 61 __asm__ volatile( \ |
2979 | 62 " fschg\n" \ |
63 " fmov @%0+,xd0\n" \ | |
64 " fmov @%0+,xd2\n" \ | |
65 " fmov @%0+,xd4\n" \ | |
66 " fmov @%0+,xd6\n" \ | |
67 " fmov @%0+,xd8\n" \ | |
68 " fmov @%0+,xd10\n" \ | |
69 " fmov @%0+,xd12\n" \ | |
70 " fmov @%0+,xd14\n" \ | |
71 " fschg\n" \ | |
8187 | 72 : "+r"(t) \ |
73 ); \ | |
74 } while (0) | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
75 |
2979 | 76 #define ftrv() \ |
8031 | 77 __asm__ volatile("ftrv xmtrx,fv0" \ |
8187 | 78 : "+f"(fr0),"+f"(fr1),"+f"(fr2),"+f"(fr3)); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
79 |
2979 | 80 #define DEFREG \ |
8031 | 81 register float fr0 __asm__("fr0"); \ |
82 register float fr1 __asm__("fr1"); \ | |
83 register float fr2 __asm__("fr2"); \ | |
84 register float fr3 __asm__("fr3") | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
85 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
86 #else |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
87 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
88 /* generic C code for check */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
89 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
90 static void ftrv_(const float xf[],float fv[]) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
91 { |
2979 | 92 float f0,f1,f2,f3; |
93 f0 = fv[0]; | |
94 f1 = fv[1]; | |
95 f2 = fv[2]; | |
96 f3 = fv[3]; | |
97 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3; | |
98 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3; | |
99 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3; | |
100 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
101 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
102 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
103 static void load_matrix_(float xf[],const float table[]) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
104 { |
2979 | 105 int i; |
106 for(i=0;i<16;i++) xf[i]=table[i]; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
107 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
108 |
2979 | 109 #define ftrv() ftrv_(xf,fv) |
110 #define load_matrix(table) load_matrix_(xf,table) | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
111 |
2979 | 112 #define DEFREG \ |
113 float fv[4],xf[16] | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
114 |
2979 | 115 #define fr0 fv[0] |
116 #define fr1 fv[1] | |
117 #define fr2 fv[2] | |
118 #define fr3 fv[3] | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
119 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
120 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
121 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
122 #if 1 |
2979 | 123 #define DESCALE(x,n) (x)*(1.0f/(1<<(n))) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
124 #else |
2979 | 125 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n)) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
126 #endif |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
127 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
128 /* this code work worse on gcc cvs. 3.2.3 work fine */ |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
129 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
130 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
131 #if 1 |
2967 | 132 //optimized |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
133 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
134 void idct_sh4(DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
135 { |
2979 | 136 DEFREG; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
137 |
2979 | 138 int i; |
139 float tblock[8*8],*fblock; | |
140 int ofs1,ofs2,ofs3; | |
8187 | 141 int fpscr; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
142 |
8187 | 143 fp_single_enter(fpscr); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
144 |
2979 | 145 /* row */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
146 |
2979 | 147 /* even part */ |
148 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
149 |
2979 | 150 fblock = tblock+4; |
151 i = 8; | |
152 do { | |
153 fr0 = block[0]; | |
154 fr1 = block[2]; | |
155 fr2 = block[4]; | |
156 fr3 = block[6]; | |
157 block+=8; | |
158 ftrv(); | |
159 *--fblock = fr3; | |
160 *--fblock = fr2; | |
161 *--fblock = fr1; | |
162 *--fblock = fr0; | |
163 fblock+=8+4; | |
164 } while(--i); | |
165 block-=8*8; | |
166 fblock-=8*8+4; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
167 |
2979 | 168 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
169 |
2979 | 170 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
171 |
2979 | 172 do { |
173 float t0,t1,t2,t3; | |
174 fr0 = block[1]; | |
175 fr1 = block[3]; | |
176 fr2 = block[5]; | |
177 fr3 = block[7]; | |
178 block+=8; | |
179 ftrv(); | |
180 t0 = *fblock++; | |
181 t1 = *fblock++; | |
182 t2 = *fblock++; | |
183 t3 = *fblock++; | |
184 fblock+=4; | |
185 *--fblock = t0 - fr0; | |
186 *--fblock = t1 - fr1; | |
187 *--fblock = t2 - fr2; | |
188 *--fblock = t3 - fr3; | |
189 *--fblock = t3 + fr3; | |
190 *--fblock = t2 + fr2; | |
191 *--fblock = t1 + fr1; | |
192 *--fblock = t0 + fr0; | |
193 fblock+=8; | |
194 } while(--i); | |
195 block-=8*8; | |
196 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
197 |
2979 | 198 /* col */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
199 |
2979 | 200 /* even part */ |
201 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
202 |
2979 | 203 ofs1 = sizeof(float)*2*8; |
204 ofs2 = sizeof(float)*4*8; | |
205 ofs3 = sizeof(float)*6*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
206 |
2979 | 207 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
208 |
2979 | 209 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs) |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
210 |
2979 | 211 do { |
212 fr0 = OA(fblock, 0); | |
213 fr1 = OA(fblock,ofs1); | |
214 fr2 = OA(fblock,ofs2); | |
215 fr3 = OA(fblock,ofs3); | |
216 ftrv(); | |
217 OA(fblock,0 ) = fr0; | |
218 OA(fblock,ofs1) = fr1; | |
219 OA(fblock,ofs2) = fr2; | |
220 OA(fblock,ofs3) = fr3; | |
221 fblock++; | |
222 } while(--i); | |
223 fblock-=8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
224 |
2979 | 225 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
226 |
2979 | 227 i=8; |
228 do { | |
229 float t0,t1,t2,t3; | |
230 t0 = OA(fblock, 0); /* [8*0] */ | |
231 t1 = OA(fblock,ofs1); /* [8*2] */ | |
232 t2 = OA(fblock,ofs2); /* [8*4] */ | |
233 t3 = OA(fblock,ofs3); /* [8*6] */ | |
234 fblock+=8; | |
235 fr0 = OA(fblock, 0); /* [8*1] */ | |
236 fr1 = OA(fblock,ofs1); /* [8*3] */ | |
237 fr2 = OA(fblock,ofs2); /* [8*5] */ | |
238 fr3 = OA(fblock,ofs3); /* [8*7] */ | |
239 fblock+=-8+1; | |
240 ftrv(); | |
241 block[8*0] = DESCALE(t0 + fr0,3); | |
242 block[8*7] = DESCALE(t0 - fr0,3); | |
243 block[8*1] = DESCALE(t1 + fr1,3); | |
244 block[8*6] = DESCALE(t1 - fr1,3); | |
245 block[8*2] = DESCALE(t2 + fr2,3); | |
246 block[8*5] = DESCALE(t2 - fr2,3); | |
247 block[8*3] = DESCALE(t3 + fr3,3); | |
248 block[8*4] = DESCALE(t3 - fr3,3); | |
249 block++; | |
250 } while(--i); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
251 |
8187 | 252 fp_single_leave(fpscr); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
253 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
254 #else |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
255 void idct_sh4(DCTELEM *block) |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
256 { |
2979 | 257 DEFREG; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
258 |
2979 | 259 int i; |
260 float tblock[8*8],*fblock; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
261 |
2979 | 262 /* row */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
263 |
2979 | 264 /* even part */ |
265 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
266 |
2979 | 267 fblock = tblock; |
268 i = 8; | |
269 do { | |
270 fr0 = block[0]; | |
271 fr1 = block[2]; | |
272 fr2 = block[4]; | |
273 fr3 = block[6]; | |
274 block+=8; | |
275 ftrv(); | |
276 fblock[0] = fr0; | |
277 fblock[2] = fr1; | |
278 fblock[4] = fr2; | |
279 fblock[6] = fr3; | |
280 fblock+=8; | |
281 } while(--i); | |
282 block-=8*8; | |
283 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
284 |
2979 | 285 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
286 |
2979 | 287 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
288 |
2979 | 289 do { |
290 float t0,t1,t2,t3; | |
291 fr0 = block[1]; | |
292 fr1 = block[3]; | |
293 fr2 = block[5]; | |
294 fr3 = block[7]; | |
295 block+=8; | |
296 ftrv(); | |
297 t0 = fblock[0]; | |
298 t1 = fblock[2]; | |
299 t2 = fblock[4]; | |
300 t3 = fblock[6]; | |
301 fblock[0] = t0 + fr0; | |
302 fblock[7] = t0 - fr0; | |
303 fblock[1] = t1 + fr1; | |
304 fblock[6] = t1 - fr1; | |
305 fblock[2] = t2 + fr2; | |
306 fblock[5] = t2 - fr2; | |
307 fblock[3] = t3 + fr3; | |
308 fblock[4] = t3 - fr3; | |
309 fblock+=8; | |
310 } while(--i); | |
311 block-=8*8; | |
312 fblock-=8*8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
313 |
2979 | 314 /* col */ |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
315 |
2979 | 316 /* even part */ |
317 load_matrix(even_table); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
318 |
2979 | 319 i = 8; |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
320 |
2979 | 321 do { |
322 fr0 = fblock[8*0]; | |
323 fr1 = fblock[8*2]; | |
324 fr2 = fblock[8*4]; | |
325 fr3 = fblock[8*6]; | |
326 ftrv(); | |
327 fblock[8*0] = fr0; | |
328 fblock[8*2] = fr1; | |
329 fblock[8*4] = fr2; | |
330 fblock[8*6] = fr3; | |
331 fblock++; | |
332 } while(--i); | |
333 fblock-=8; | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
334 |
2979 | 335 load_matrix(odd_table); |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
336 |
2979 | 337 i=8; |
338 do { | |
339 float t0,t1,t2,t3; | |
340 fr0 = fblock[8*1]; | |
341 fr1 = fblock[8*3]; | |
342 fr2 = fblock[8*5]; | |
343 fr3 = fblock[8*7]; | |
344 ftrv(); | |
345 t0 = fblock[8*0]; | |
346 t1 = fblock[8*2]; | |
347 t2 = fblock[8*4]; | |
348 t3 = fblock[8*6]; | |
349 fblock++; | |
350 block[8*0] = DESCALE(t0 + fr0,3); | |
351 block[8*7] = DESCALE(t0 - fr0,3); | |
352 block[8*1] = DESCALE(t1 + fr1,3); | |
353 block[8*6] = DESCALE(t1 - fr1,3); | |
354 block[8*2] = DESCALE(t2 + fr2,3); | |
355 block[8*5] = DESCALE(t2 - fr2,3); | |
356 block[8*3] = DESCALE(t3 + fr3,3); | |
357 block[8*4] = DESCALE(t3 - fr3,3); | |
358 block++; | |
359 } while(--i); | |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
360 } |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
diff
changeset
|
361 #endif |