Mercurial > libavcodec.hg
annotate dct-test.c @ 5311:7742d5411c9d libavcodec
AC-3 decoder, soc revision 48, Aug 16 11:27:49 2006 UTC by cloud9
I realized that the bug was not in the imdct routine but in the
get_transform_coeffs.
Fixed it.
Code now uses the ffmpeg's imdct routines.
All the mplayer's ac3 samples are decoded
successfully.
Also improved downmixing.
Now all the downmixing coeffcients for channels
are normalized such that the sum of coefficients
used to construct the output for single channel
never exceeds 1.0.
author | jbr |
---|---|
date | Sat, 14 Jul 2007 15:58:42 +0000 |
parents | 4dbe6578f811 |
children | e9f8a337c5ce |
rev | line source |
---|---|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
1 /* |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
2 * (c) 2001 Fabrice Bellard |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
3 * 2007 Marc Hoffman <marc.hoffman@analog.com> |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
15 * Lesser General Public License for more details. |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
16 * |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
20 */ |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
21 |
1106 | 22 /** |
23 * @file dct-test.c | |
2967 | 24 * DCT test. (c) 2001 Fabrice Bellard. |
1106 | 25 * Started from sample code by Juan J. Sierralta P. |
26 */ | |
27 | |
0 | 28 #include <stdlib.h> |
29 #include <stdio.h> | |
30 #include <string.h> | |
31 #include <sys/time.h> | |
32 #include <unistd.h> | |
5118
3b190bc34546
Add some #includes to allow compilation without HAVE_AV_CONFIG_H.
diego
parents:
5110
diff
changeset
|
33 #include <math.h> |
0 | 34 |
35 #include "dsputil.h" | |
36 | |
633 | 37 #include "simple_idct.h" |
1557 | 38 #include "faandct.h" |
33 | 39 |
980 | 40 #ifndef MAX |
41 #define MAX(a, b) (((a) > (b)) ? (a) : (b)) | |
42 #endif | |
43 | |
2872 | 44 #undef printf |
45 | |
46 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);}; | |
47 | |
33 | 48 /* reference fdct/idct */ |
0 | 49 extern void fdct(DCTELEM *block); |
33 | 50 extern void idct(DCTELEM *block); |
2872 | 51 extern void ff_idct_xvid_mmx(DCTELEM *block); |
52 extern void ff_idct_xvid_mmx2(DCTELEM *block); | |
0 | 53 extern void init_fdct(); |
54 | |
33 | 55 extern void ff_mmx_idct(DCTELEM *data); |
56 extern void ff_mmxext_idct(DCTELEM *data); | |
57 | |
633 | 58 extern void odivx_idct_c (short *block); |
59 | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
60 // BFIN |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
61 extern void ff_bfin_idct (DCTELEM *block) ; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
62 extern void ff_bfin_fdct (DCTELEM *block) ; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
63 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
64 // ALTIVEC |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
65 extern void fdct_altivec (DCTELEM *block); |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
66 //extern void idct_altivec (DCTELEM *block);?? no routine |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
67 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
68 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
69 struct algo { |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
70 char *name; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
71 enum { FDCT, IDCT } is_idct; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
72 void (* func) (DCTELEM *block); |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
73 void (* ref) (DCTELEM *block); |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
74 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
75 }; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
76 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
77 #ifndef FAAN_POSTSCALE |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
78 #define FAAN_SCALE SCALE_PERM |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
79 #else |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
80 #define FAAN_SCALE NO_PERM |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
81 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
82 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
83 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form} |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
84 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
85 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
86 struct algo algos[] = { |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
87 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM), |
5107 | 88 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE), |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
89 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
90 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
91 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
92 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
93 DCT_ERROR("SIMPLE-C", 1, simple_idct, idct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
94 |
5108 | 95 #ifdef HAVE_MMX |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
96 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM), |
5110 | 97 #ifdef HAVE_MMX2 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
98 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM), |
5110 | 99 #endif |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
100 |
5109 | 101 #ifdef CONFIG_GPL |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
102 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
103 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM), |
5109 | 104 #endif |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
105 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
106 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
107 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
108 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
109 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
110 #ifdef HAVE_ALTIVEC |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
111 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
112 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
113 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
114 #ifdef ARCH_BFIN |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
115 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
116 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM), |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
117 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
118 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
119 { 0 } |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
120 }; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
121 |
0 | 122 #define AANSCALE_BITS 12 |
123 static const unsigned short aanscales[64] = { | |
124 /* precomputed values scaled up by 14 bits */ | |
125 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
126 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, | |
127 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, | |
128 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, | |
129 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
130 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, | |
131 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, | |
132 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 | |
133 }; | |
134 | |
4197 | 135 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; |
633 | 136 |
1064 | 137 int64_t gettime(void) |
0 | 138 { |
139 struct timeval tv; | |
140 gettimeofday(&tv,NULL); | |
1064 | 141 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; |
0 | 142 } |
143 | |
144 #define NB_ITS 20000 | |
145 #define NB_ITS_SPEED 50000 | |
146 | |
33 | 147 static short idct_mmx_perm[64]; |
148 | |
633 | 149 static short idct_simple_mmx_perm[64]={ |
2979 | 150 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
151 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
152 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
153 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
154 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
155 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
156 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
157 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
633 | 158 }; |
159 | |
33 | 160 void idct_mmx_init(void) |
161 { | |
162 int i; | |
163 | |
164 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |
165 for (i = 0; i < 64; i++) { | |
2979 | 166 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
167 // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |
33 | 168 } |
169 } | |
170 | |
171 static DCTELEM block[64] __attribute__ ((aligned (8))); | |
172 static DCTELEM block1[64] __attribute__ ((aligned (8))); | |
633 | 173 static DCTELEM block_org[64] __attribute__ ((aligned (8))); |
33 | 174 |
175 void dct_error(const char *name, int is_idct, | |
176 void (*fdct_func)(DCTELEM *block), | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
177 void (*fdct_ref)(DCTELEM *block), int form, int test) |
0 | 178 { |
179 int it, i, scale; | |
180 int err_inf, v; | |
1064 | 181 int64_t err2, ti, ti1, it1; |
182 int64_t sysErr[64], sysErrMax=0; | |
633 | 183 int maxout=0; |
184 int blockSumErrMax=0, blockSumErr; | |
0 | 185 |
186 srandom(0); | |
187 | |
188 err_inf = 0; | |
189 err2 = 0; | |
633 | 190 for(i=0; i<64; i++) sysErr[i]=0; |
0 | 191 for(it=0;it<NB_ITS;it++) { |
633 | 192 for(i=0;i<64;i++) |
193 block1[i] = 0; | |
194 switch(test){ | |
2967 | 195 case 0: |
633 | 196 for(i=0;i<64;i++) |
197 block1[i] = (random() % 512) -256; | |
635 | 198 if (is_idct){ |
633 | 199 fdct(block1); |
635 | 200 |
201 for(i=0;i<64;i++) | |
202 block1[i]>>=3; | |
203 } | |
633 | 204 break; |
205 case 1:{ | |
206 int num= (random()%10)+1; | |
207 for(i=0;i<num;i++) | |
208 block1[random()%64] = (random() % 512) -256; | |
209 }break; | |
210 case 2: | |
211 block1[0]= (random()%4096)-2048; | |
212 block1[63]= (block1[0]&1)^1; | |
213 break; | |
214 } | |
33 | 215 |
633 | 216 #if 0 // simulate mismatch control |
217 { int sum=0; | |
218 for(i=0;i<64;i++) | |
219 sum+=block1[i]; | |
220 | |
2967 | 221 if((sum&1)==0) block1[63]^=1; |
633 | 222 } |
223 #endif | |
224 | |
225 for(i=0; i<64; i++) | |
226 block_org[i]= block1[i]; | |
33 | 227 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
228 if (form == MMX_PERM) { |
633 | 229 for(i=0;i<64;i++) |
33 | 230 block[idct_mmx_perm[i]] = block1[i]; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
231 } else if (form == MMX_SIMPLE_PERM) { |
633 | 232 for(i=0;i<64;i++) |
233 block[idct_simple_mmx_perm[i]] = block1[i]; | |
234 | |
2979 | 235 } else { |
633 | 236 for(i=0; i<64; i++) |
237 block[i]= block1[i]; | |
33 | 238 } |
633 | 239 #if 0 // simulate mismatch control for tested IDCT but not the ref |
240 { int sum=0; | |
241 for(i=0;i<64;i++) | |
242 sum+=block[i]; | |
243 | |
2967 | 244 if((sum&1)==0) block[63]^=1; |
633 | 245 } |
246 #endif | |
33 | 247 |
0 | 248 fdct_func(block); |
4752
d49f8b3e8c4a
Replace emms() with emms_c(), patch by Marc Hoffman, mmh pleasantst com.
diego
parents:
4197
diff
changeset
|
249 emms_c(); /* for ff_mmx_idct */ |
33 | 250 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
251 if (form == SCALE_PERM) { |
0 | 252 for(i=0; i<64; i++) { |
635 | 253 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i]; |
633 | 254 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; |
255 } | |
256 } | |
257 | |
33 | 258 fdct_ref(block1); |
0 | 259 |
633 | 260 blockSumErr=0; |
0 | 261 for(i=0;i<64;i++) { |
262 v = abs(block[i] - block1[i]); | |
263 if (v > err_inf) | |
264 err_inf = v; | |
265 err2 += v * v; | |
2979 | 266 sysErr[i] += block[i] - block1[i]; |
267 blockSumErr += v; | |
268 if( abs(block[i])>maxout) maxout=abs(block[i]); | |
0 | 269 } |
633 | 270 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; |
271 #if 0 // print different matrix pairs | |
272 if(blockSumErr){ | |
273 printf("\n"); | |
274 for(i=0; i<64; i++){ | |
275 if((i&7)==0) printf("\n"); | |
276 printf("%4d ", block_org[i]); | |
277 } | |
278 for(i=0; i<64; i++){ | |
279 if((i&7)==0) printf("\n"); | |
280 printf("%4d ", block[i] - block1[i]); | |
281 } | |
282 } | |
283 #endif | |
0 | 284 } |
4001 | 285 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i])); |
2967 | 286 |
633 | 287 #if 1 // dump systematic errors |
288 for(i=0; i<64; i++){ | |
2979 | 289 if(i%8==0) printf("\n"); |
633 | 290 printf("%5d ", (int)sysErr[i]); |
291 } | |
292 printf("\n"); | |
293 #endif | |
2967 | 294 |
633 | 295 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
33 | 296 is_idct ? "IDCT" : "DCT", |
633 | 297 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); |
298 #if 1 //Speed test | |
0 | 299 /* speed test */ |
633 | 300 for(i=0;i<64;i++) |
301 block1[i] = 0; | |
302 switch(test){ | |
2967 | 303 case 0: |
633 | 304 for(i=0;i<64;i++) |
305 block1[i] = (random() % 512) -256; | |
635 | 306 if (is_idct){ |
633 | 307 fdct(block1); |
635 | 308 |
309 for(i=0;i<64;i++) | |
310 block1[i]>>=3; | |
311 } | |
633 | 312 break; |
313 case 1:{ | |
314 case 2: | |
315 block1[0] = (random() % 512) -256; | |
316 block1[1] = (random() % 512) -256; | |
317 block1[2] = (random() % 512) -256; | |
318 block1[3] = (random() % 512) -256; | |
319 }break; | |
320 } | |
0 | 321 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
322 if (form == MMX_PERM) { |
633 | 323 for(i=0;i<64;i++) |
33 | 324 block[idct_mmx_perm[i]] = block1[i]; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
325 } else if(form == MMX_SIMPLE_PERM) { |
633 | 326 for(i=0;i<64;i++) |
327 block[idct_simple_mmx_perm[i]] = block1[i]; | |
328 } else { | |
329 for(i=0; i<64; i++) | |
330 block[i]= block1[i]; | |
33 | 331 } |
332 | |
0 | 333 ti = gettime(); |
334 it1 = 0; | |
335 do { | |
336 for(it=0;it<NB_ITS_SPEED;it++) { | |
633 | 337 for(i=0; i<64; i++) |
338 block[i]= block1[i]; | |
339 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
5127 | 340 // do not memcpy especially not fastmemcpy because it does movntq !!! |
0 | 341 fdct_func(block); |
342 } | |
343 it1 += NB_ITS_SPEED; | |
344 ti1 = gettime() - ti; | |
345 } while (ti1 < 1000000); | |
4752
d49f8b3e8c4a
Replace emms() with emms_c(), patch by Marc Hoffman, mmh pleasantst com.
diego
parents:
4197
diff
changeset
|
346 emms_c(); |
0 | 347 |
633 | 348 printf("%s %s: %0.1f kdct/s\n", |
33 | 349 is_idct ? "IDCT" : "DCT", |
0 | 350 name, (double)it1 * 1000.0 / (double)ti1); |
633 | 351 #endif |
0 | 352 } |
353 | |
1064 | 354 static uint8_t img_dest[64] __attribute__ ((aligned (8))); |
355 static uint8_t img_dest1[64] __attribute__ ((aligned (8))); | |
720 | 356 |
1064 | 357 void idct248_ref(uint8_t *dest, int linesize, int16_t *block) |
720 | 358 { |
359 static int init; | |
360 static double c8[8][8]; | |
361 static double c4[4][4]; | |
362 double block1[64], block2[64], block3[64]; | |
363 double s, sum, v; | |
364 int i, j, k; | |
365 | |
366 if (!init) { | |
367 init = 1; | |
368 | |
369 for(i=0;i<8;i++) { | |
370 sum = 0; | |
371 for(j=0;j<8;j++) { | |
372 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0); | |
373 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
374 sum += c8[i][j] * c8[i][j]; | |
375 } | |
376 } | |
2967 | 377 |
720 | 378 for(i=0;i<4;i++) { |
379 sum = 0; | |
380 for(j=0;j<4;j++) { | |
381 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0); | |
382 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
383 sum += c4[i][j] * c4[i][j]; | |
384 } | |
385 } | |
386 } | |
387 | |
388 /* butterfly */ | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
389 s = 0.5 * sqrt(2.0); |
720 | 390 for(i=0;i<4;i++) { |
391 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
392 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
393 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s; |
720 | 394 } |
395 } | |
396 | |
397 /* idct8 on lines */ | |
398 for(i=0;i<8;i++) { | |
399 for(j=0;j<8;j++) { | |
400 sum = 0; | |
401 for(k=0;k<8;k++) | |
402 sum += c8[k][j] * block1[8*i+k]; | |
403 block2[8*i+j] = sum; | |
404 } | |
405 } | |
406 | |
407 /* idct4 */ | |
408 for(i=0;i<8;i++) { | |
409 for(j=0;j<4;j++) { | |
410 /* top */ | |
411 sum = 0; | |
412 for(k=0;k<4;k++) | |
413 sum += c4[k][j] * block2[8*(2*k)+i]; | |
414 block3[8*(2*j)+i] = sum; | |
415 | |
416 /* bottom */ | |
417 sum = 0; | |
418 for(k=0;k<4;k++) | |
419 sum += c4[k][j] * block2[8*(2*k+1)+i]; | |
420 block3[8*(2*j+1)+i] = sum; | |
421 } | |
422 } | |
423 | |
424 /* clamp and store the result */ | |
425 for(i=0;i<8;i++) { | |
426 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
427 v = block3[8*i+j]; |
720 | 428 if (v < 0) |
429 v = 0; | |
430 else if (v > 255) | |
431 v = 255; | |
432 dest[i * linesize + j] = (int)rint(v); | |
433 } | |
434 } | |
435 } | |
436 | |
2967 | 437 void idct248_error(const char *name, |
1064 | 438 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block)) |
720 | 439 { |
440 int it, i, it1, ti, ti1, err_max, v; | |
441 | |
442 srandom(0); | |
2967 | 443 |
720 | 444 /* just one test to see if code is correct (precision is less |
445 important here) */ | |
446 err_max = 0; | |
447 for(it=0;it<NB_ITS;it++) { | |
2967 | 448 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
449 /* XXX: use forward transform to generate values */ |
720 | 450 for(i=0;i<64;i++) |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
451 block1[i] = (random() % 256) - 128; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
452 block1[0] += 1024; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
453 |
720 | 454 for(i=0; i<64; i++) |
455 block[i]= block1[i]; | |
456 idct248_ref(img_dest1, 8, block); | |
2967 | 457 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
458 for(i=0; i<64; i++) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
459 block[i]= block1[i]; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
460 idct248_put(img_dest, 8, block); |
2967 | 461 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
462 for(i=0;i<64;i++) { |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
463 v = abs((int)img_dest[i] - (int)img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
464 if (v == 255) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
465 printf("%d %d\n", img_dest[i], img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
466 if (v > err_max) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
467 err_max = v; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
468 } |
720 | 469 #if 0 |
470 printf("ref=\n"); | |
471 for(i=0;i<8;i++) { | |
472 int j; | |
473 for(j=0;j<8;j++) { | |
474 printf(" %3d", img_dest1[i*8+j]); | |
475 } | |
476 printf("\n"); | |
477 } | |
2967 | 478 |
720 | 479 printf("out=\n"); |
480 for(i=0;i<8;i++) { | |
481 int j; | |
482 for(j=0;j<8;j++) { | |
483 printf(" %3d", img_dest[i*8+j]); | |
484 } | |
485 printf("\n"); | |
486 } | |
487 #endif | |
488 } | |
489 printf("%s %s: err_inf=%d\n", | |
490 1 ? "IDCT248" : "DCT248", | |
491 name, err_max); | |
492 | |
493 ti = gettime(); | |
494 it1 = 0; | |
495 do { | |
496 for(it=0;it<NB_ITS_SPEED;it++) { | |
497 for(i=0; i<64; i++) | |
498 block[i]= block1[i]; | |
499 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
5127 | 500 // do not memcpy especially not fastmemcpy because it does movntq !!! |
720 | 501 idct248_put(img_dest, 8, block); |
502 } | |
503 it1 += NB_ITS_SPEED; | |
504 ti1 = gettime() - ti; | |
505 } while (ti1 < 1000000); | |
4752
d49f8b3e8c4a
Replace emms() with emms_c(), patch by Marc Hoffman, mmh pleasantst com.
diego
parents:
4197
diff
changeset
|
506 emms_c(); |
720 | 507 |
508 printf("%s %s: %0.1f kdct/s\n", | |
509 1 ? "IDCT248" : "DCT248", | |
510 name, (double)it1 * 1000.0 / (double)ti1); | |
511 } | |
512 | |
33 | 513 void help(void) |
514 { | |
633 | 515 printf("dct-test [-i] [<test-number>]\n" |
516 "test-number 0 -> test with random matrixes\n" | |
517 " 1 -> test with random sparse matrixes\n" | |
518 " 2 -> do 3. test from mpeg4 std\n" | |
720 | 519 "-i test IDCT implementations\n" |
520 "-4 test IDCT248 implementations\n"); | |
33 | 521 } |
522 | |
0 | 523 int main(int argc, char **argv) |
524 { | |
720 | 525 int test_idct = 0, test_248_dct = 0; |
633 | 526 int c,i; |
527 int test=1; | |
33 | 528 |
0 | 529 init_fdct(); |
33 | 530 idct_mmx_init(); |
0 | 531 |
4197 | 532 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
633 | 533 for(i=0;i<MAX_NEG_CROP;i++) { |
4197 | 534 cropTbl[i] = 0; |
535 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
633 | 536 } |
2967 | 537 |
33 | 538 for(;;) { |
720 | 539 c = getopt(argc, argv, "ih4"); |
33 | 540 if (c == -1) |
541 break; | |
542 switch(c) { | |
543 case 'i': | |
544 test_idct = 1; | |
545 break; | |
720 | 546 case '4': |
547 test_248_dct = 1; | |
548 break; | |
633 | 549 default : |
33 | 550 case 'h': |
551 help(); | |
4754 | 552 return 0; |
33 | 553 } |
554 } | |
2967 | 555 |
633 | 556 if(optind <argc) test= atoi(argv[optind]); |
2967 | 557 |
33 | 558 printf("ffmpeg DCT/IDCT test\n"); |
559 | |
720 | 560 if (test_248_dct) { |
561 idct248_error("SIMPLE-C", simple_idct248_put); | |
33 | 562 } else { |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
563 for (i=0;algos[i].name;i++) |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
564 if (algos[i].is_idct == test_idct) { |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
565 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test); |
720 | 566 } |
33 | 567 } |
0 | 568 return 0; |
569 } |