Mercurial > libavcodec.hg
annotate dct-test.c @ 9830:bd0879f752e6 libavcodec
Express the H.264 parser dependency on the golomb code in configure instead of
in the Makefile as it is done for all other parts that depend on golomb.
author | diego |
---|---|
date | Tue, 09 Jun 2009 20:29:52 +0000 |
parents | 54456267c77c |
children | 34a65026fa06 |
rev | line source |
---|---|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
1 /* |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
2 * (c) 2001 Fabrice Bellard |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
3 * 2007 Marc Hoffman <marc.hoffman@analog.com> |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
15 * Lesser General Public License for more details. |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
16 * |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
20 */ |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
21 |
1106 | 22 /** |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8693
diff
changeset
|
23 * @file libavcodec/dct-test.c |
8724
2c5662c41129
cosmetics: Remove period after copyright statement non-sentence.
diego
parents:
8718
diff
changeset
|
24 * DCT test (c) 2001 Fabrice Bellard |
1106 | 25 * Started from sample code by Juan J. Sierralta P. |
26 */ | |
27 | |
0 | 28 #include <stdlib.h> |
29 #include <stdio.h> | |
30 #include <string.h> | |
31 #include <sys/time.h> | |
32 #include <unistd.h> | |
5118
3b190bc34546
Add some #includes to allow compilation without HAVE_AV_CONFIG_H.
diego
parents:
5110
diff
changeset
|
33 #include <math.h> |
0 | 34 |
7130
601509a430f7
Replace redundant MAX macro declaration by proper use of FFMAX.
diego
parents:
7125
diff
changeset
|
35 #include "libavutil/common.h" |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9189
diff
changeset
|
36 #include "libavutil/lfg.h" |
0 | 37 |
633 | 38 #include "simple_idct.h" |
8223 | 39 #include "aandcttab.h" |
1557 | 40 #include "faandct.h" |
6407 | 41 #include "faanidct.h" |
8430 | 42 #include "x86/idct_xvid.h" |
33 | 43 |
2872 | 44 #undef printf |
45 | |
46 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);}; | |
47 | |
33 | 48 /* reference fdct/idct */ |
9293 | 49 void ff_ref_fdct(DCTELEM *block); |
50 void ff_ref_idct(DCTELEM *block); | |
51 void ff_ref_dct_init(void); | |
0 | 52 |
8250 | 53 void ff_mmx_idct(DCTELEM *data); |
54 void ff_mmxext_idct(DCTELEM *data); | |
33 | 55 |
8250 | 56 void odivx_idct_c(short *block); |
633 | 57 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
58 // BFIN |
8250 | 59 void ff_bfin_idct(DCTELEM *block); |
60 void ff_bfin_fdct(DCTELEM *block); | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
61 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
62 // ALTIVEC |
8250 | 63 void fdct_altivec(DCTELEM *block); |
64 //void idct_altivec(DCTELEM *block);?? no routine | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
65 |
8351 | 66 // ARM |
67 void j_rev_dct_ARM(DCTELEM *data); | |
68 void simple_idct_ARM(DCTELEM *data); | |
69 void simple_idct_armv5te(DCTELEM *data); | |
70 void ff_simple_idct_armv6(DCTELEM *data); | |
71 void ff_simple_idct_neon(DCTELEM *data); | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
72 |
8622 | 73 void ff_simple_idct_axp(DCTELEM *data); |
74 | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
75 struct algo { |
7125
65e7e714c770
Mark constant structure member as const to avoid some warnings.
diego
parents:
6602
diff
changeset
|
76 const char *name; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
77 enum { FDCT, IDCT } is_idct; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
78 void (* func) (DCTELEM *block); |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
79 void (* ref) (DCTELEM *block); |
8350 | 80 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format; |
6543
948d9453432b
check mm_flags for each DCT and skips the ones that can't be run
gpoirier
parents:
6542
diff
changeset
|
81 int mm_support; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
82 }; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
83 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
84 #ifndef FAAN_POSTSCALE |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
85 #define FAAN_SCALE SCALE_PERM |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
86 #else |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
87 #define FAAN_SCALE NO_PERM |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
88 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
89 |
7155 | 90 static int cpu_flags; |
91 | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
92 struct algo algos[] = { |
9293 | 93 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM}, |
94 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE}, | |
95 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM}, | |
96 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM}, | |
97 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM}, | |
98 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM}, | |
99 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM}, | |
100 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM}, | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
101 |
8590 | 102 #if HAVE_MMX |
9293 | 103 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX}, |
8590 | 104 #if HAVE_MMX2 |
9406
8d51b340393e
Replace deprecated FF_MM_MMXEXT flag with FF_MM_MMX2.
stefano
parents:
9388
diff
changeset
|
105 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMX2}, |
9293 | 106 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2}, |
5110 | 107 #endif |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
108 |
8590 | 109 #if CONFIG_GPL |
9293 | 110 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX}, |
9406
8d51b340393e
Replace deprecated FF_MM_MMXEXT flag with FF_MM_MMX2.
stefano
parents:
9388
diff
changeset
|
111 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX2}, |
5109 | 112 #endif |
9293 | 113 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX}, |
114 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX}, | |
9406
8d51b340393e
Replace deprecated FF_MM_MMXEXT flag with FF_MM_MMX2.
stefano
parents:
9388
diff
changeset
|
115 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMX2}, |
9293 | 116 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2}, |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
117 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
118 |
8590 | 119 #if HAVE_ALTIVEC |
9293 | 120 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC}, |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
121 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
122 |
8590 | 123 #if ARCH_BFIN |
9293 | 124 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM}, |
125 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM}, | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
126 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
127 |
8590 | 128 #if ARCH_ARM |
9293 | 129 {"SIMPLE-ARM", 1, simple_idct_ARM, ff_ref_idct, NO_PERM }, |
130 {"INT-ARM", 1, j_rev_dct_ARM, ff_ref_idct, MMX_PERM }, | |
8590 | 131 #if HAVE_ARMV5TE |
9293 | 132 {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, ff_ref_idct, NO_PERM }, |
8351 | 133 #endif |
8590 | 134 #if HAVE_ARMV6 |
9293 | 135 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM }, |
8351 | 136 #endif |
8590 | 137 #if HAVE_NEON |
9293 | 138 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM }, |
8351 | 139 #endif |
8359 | 140 #endif /* ARCH_ARM */ |
8351 | 141 |
8622 | 142 #if ARCH_ALPHA |
9293 | 143 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM }, |
8622 | 144 #endif |
145 | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
146 { 0 } |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
147 }; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
148 |
0 | 149 #define AANSCALE_BITS 12 |
150 | |
4197 | 151 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; |
633 | 152 |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
153 static int64_t gettime(void) |
0 | 154 { |
155 struct timeval tv; | |
156 gettimeofday(&tv,NULL); | |
1064 | 157 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; |
0 | 158 } |
159 | |
160 #define NB_ITS 20000 | |
161 #define NB_ITS_SPEED 50000 | |
162 | |
33 | 163 static short idct_mmx_perm[64]; |
164 | |
633 | 165 static short idct_simple_mmx_perm[64]={ |
2979 | 166 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
167 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
168 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
169 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
170 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
171 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
172 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
173 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
633 | 174 }; |
175 | |
6602 | 176 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; |
177 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
178 static void idct_mmx_init(void) |
33 | 179 { |
180 int i; | |
181 | |
182 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |
183 for (i = 0; i < 64; i++) { | |
2979 | 184 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
185 // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |
33 | 186 } |
187 } | |
188 | |
9793
54456267c77c
Replace more uses of __attribute__((aligned)) by DECLARE_ALIGNED.
ramiro
parents:
9406
diff
changeset
|
189 DECLARE_ALIGNED(16, static DCTELEM, block[64]); |
54456267c77c
Replace more uses of __attribute__((aligned)) by DECLARE_ALIGNED.
ramiro
parents:
9406
diff
changeset
|
190 DECLARE_ALIGNED(8, static DCTELEM, block1[64]); |
54456267c77c
Replace more uses of __attribute__((aligned)) by DECLARE_ALIGNED.
ramiro
parents:
9406
diff
changeset
|
191 DECLARE_ALIGNED(8, static DCTELEM, block_org[64]); |
33 | 192 |
7155 | 193 static inline void mmx_emms(void) |
194 { | |
8590 | 195 #if HAVE_MMX |
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
196 if (cpu_flags & FF_MM_MMX) |
8031 | 197 __asm__ volatile ("emms\n\t"); |
7155 | 198 #endif |
199 } | |
200 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
201 static void dct_error(const char *name, int is_idct, |
33 | 202 void (*fdct_func)(DCTELEM *block), |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
203 void (*fdct_ref)(DCTELEM *block), int form, int test) |
0 | 204 { |
205 int it, i, scale; | |
206 int err_inf, v; | |
1064 | 207 int64_t err2, ti, ti1, it1; |
208 int64_t sysErr[64], sysErrMax=0; | |
633 | 209 int maxout=0; |
210 int blockSumErrMax=0, blockSumErr; | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
211 AVLFG prng; |
0 | 212 |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
213 av_lfg_init(&prng, 1); |
0 | 214 |
215 err_inf = 0; | |
216 err2 = 0; | |
633 | 217 for(i=0; i<64; i++) sysErr[i]=0; |
0 | 218 for(it=0;it<NB_ITS;it++) { |
633 | 219 for(i=0;i<64;i++) |
220 block1[i] = 0; | |
221 switch(test){ | |
2967 | 222 case 0: |
633 | 223 for(i=0;i<64;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
224 block1[i] = (av_lfg_get(&prng) % 512) -256; |
635 | 225 if (is_idct){ |
9293 | 226 ff_ref_fdct(block1); |
635 | 227 |
228 for(i=0;i<64;i++) | |
229 block1[i]>>=3; | |
230 } | |
633 | 231 break; |
232 case 1:{ | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
233 int num = av_lfg_get(&prng) % 10 + 1; |
633 | 234 for(i=0;i<num;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
235 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256; |
633 | 236 }break; |
237 case 2: | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
238 block1[0] = av_lfg_get(&prng) % 4096 - 2048; |
633 | 239 block1[63]= (block1[0]&1)^1; |
240 break; | |
241 } | |
33 | 242 |
633 | 243 #if 0 // simulate mismatch control |
244 { int sum=0; | |
245 for(i=0;i<64;i++) | |
246 sum+=block1[i]; | |
247 | |
2967 | 248 if((sum&1)==0) block1[63]^=1; |
633 | 249 } |
250 #endif | |
251 | |
252 for(i=0; i<64; i++) | |
253 block_org[i]= block1[i]; | |
33 | 254 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
255 if (form == MMX_PERM) { |
633 | 256 for(i=0;i<64;i++) |
33 | 257 block[idct_mmx_perm[i]] = block1[i]; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
258 } else if (form == MMX_SIMPLE_PERM) { |
633 | 259 for(i=0;i<64;i++) |
260 block[idct_simple_mmx_perm[i]] = block1[i]; | |
261 | |
6602 | 262 } else if (form == SSE2_PERM) { |
263 for(i=0; i<64; i++) | |
264 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i]; | |
8350 | 265 } else if (form == PARTTRANS_PERM) { |
266 for(i=0; i<64; i++) | |
267 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i]; | |
2979 | 268 } else { |
633 | 269 for(i=0; i<64; i++) |
270 block[i]= block1[i]; | |
33 | 271 } |
633 | 272 #if 0 // simulate mismatch control for tested IDCT but not the ref |
273 { int sum=0; | |
274 for(i=0;i<64;i++) | |
275 sum+=block[i]; | |
276 | |
2967 | 277 if((sum&1)==0) block[63]^=1; |
633 | 278 } |
279 #endif | |
33 | 280 |
0 | 281 fdct_func(block); |
7155 | 282 mmx_emms(); |
33 | 283 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
284 if (form == SCALE_PERM) { |
0 | 285 for(i=0; i<64; i++) { |
8223 | 286 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; |
633 | 287 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; |
288 } | |
289 } | |
290 | |
33 | 291 fdct_ref(block1); |
0 | 292 |
633 | 293 blockSumErr=0; |
0 | 294 for(i=0;i<64;i++) { |
295 v = abs(block[i] - block1[i]); | |
296 if (v > err_inf) | |
297 err_inf = v; | |
298 err2 += v * v; | |
2979 | 299 sysErr[i] += block[i] - block1[i]; |
300 blockSumErr += v; | |
301 if( abs(block[i])>maxout) maxout=abs(block[i]); | |
0 | 302 } |
633 | 303 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; |
304 #if 0 // print different matrix pairs | |
305 if(blockSumErr){ | |
306 printf("\n"); | |
307 for(i=0; i<64; i++){ | |
308 if((i&7)==0) printf("\n"); | |
309 printf("%4d ", block_org[i]); | |
310 } | |
311 for(i=0; i<64; i++){ | |
312 if((i&7)==0) printf("\n"); | |
313 printf("%4d ", block[i] - block1[i]); | |
314 } | |
315 } | |
316 #endif | |
0 | 317 } |
7130
601509a430f7
Replace redundant MAX macro declaration by proper use of FFMAX.
diego
parents:
7125
diff
changeset
|
318 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i])); |
2967 | 319 |
633 | 320 #if 1 // dump systematic errors |
321 for(i=0; i<64; i++){ | |
2979 | 322 if(i%8==0) printf("\n"); |
9189 | 323 printf("%7d ", (int)sysErr[i]); |
633 | 324 } |
325 printf("\n"); | |
326 #endif | |
2967 | 327 |
633 | 328 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
33 | 329 is_idct ? "IDCT" : "DCT", |
633 | 330 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); |
331 #if 1 //Speed test | |
0 | 332 /* speed test */ |
633 | 333 for(i=0;i<64;i++) |
334 block1[i] = 0; | |
335 switch(test){ | |
2967 | 336 case 0: |
633 | 337 for(i=0;i<64;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
338 block1[i] = av_lfg_get(&prng) % 512 -256; |
635 | 339 if (is_idct){ |
9293 | 340 ff_ref_fdct(block1); |
635 | 341 |
342 for(i=0;i<64;i++) | |
343 block1[i]>>=3; | |
344 } | |
633 | 345 break; |
346 case 1:{ | |
347 case 2: | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
348 block1[0] = av_lfg_get(&prng) % 512 -256; |
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
349 block1[1] = av_lfg_get(&prng) % 512 -256; |
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
350 block1[2] = av_lfg_get(&prng) % 512 -256; |
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
351 block1[3] = av_lfg_get(&prng) % 512 -256; |
633 | 352 }break; |
353 } | |
0 | 354 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
355 if (form == MMX_PERM) { |
633 | 356 for(i=0;i<64;i++) |
33 | 357 block[idct_mmx_perm[i]] = block1[i]; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
358 } else if(form == MMX_SIMPLE_PERM) { |
633 | 359 for(i=0;i<64;i++) |
360 block[idct_simple_mmx_perm[i]] = block1[i]; | |
361 } else { | |
362 for(i=0; i<64; i++) | |
363 block[i]= block1[i]; | |
33 | 364 } |
365 | |
0 | 366 ti = gettime(); |
367 it1 = 0; | |
368 do { | |
369 for(it=0;it<NB_ITS_SPEED;it++) { | |
633 | 370 for(i=0; i<64; i++) |
371 block[i]= block1[i]; | |
372 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
5127 | 373 // do not memcpy especially not fastmemcpy because it does movntq !!! |
0 | 374 fdct_func(block); |
375 } | |
376 it1 += NB_ITS_SPEED; | |
377 ti1 = gettime() - ti; | |
378 } while (ti1 < 1000000); | |
7155 | 379 mmx_emms(); |
0 | 380 |
633 | 381 printf("%s %s: %0.1f kdct/s\n", |
33 | 382 is_idct ? "IDCT" : "DCT", |
0 | 383 name, (double)it1 * 1000.0 / (double)ti1); |
633 | 384 #endif |
0 | 385 } |
386 | |
9793
54456267c77c
Replace more uses of __attribute__((aligned)) by DECLARE_ALIGNED.
ramiro
parents:
9406
diff
changeset
|
387 DECLARE_ALIGNED(8, static uint8_t, img_dest[64]); |
54456267c77c
Replace more uses of __attribute__((aligned)) by DECLARE_ALIGNED.
ramiro
parents:
9406
diff
changeset
|
388 DECLARE_ALIGNED(8, static uint8_t, img_dest1[64]); |
720 | 389 |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
390 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block) |
720 | 391 { |
392 static int init; | |
393 static double c8[8][8]; | |
394 static double c4[4][4]; | |
395 double block1[64], block2[64], block3[64]; | |
396 double s, sum, v; | |
397 int i, j, k; | |
398 | |
399 if (!init) { | |
400 init = 1; | |
401 | |
402 for(i=0;i<8;i++) { | |
403 sum = 0; | |
404 for(j=0;j<8;j++) { | |
405 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0); | |
406 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
407 sum += c8[i][j] * c8[i][j]; | |
408 } | |
409 } | |
2967 | 410 |
720 | 411 for(i=0;i<4;i++) { |
412 sum = 0; | |
413 for(j=0;j<4;j++) { | |
414 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0); | |
415 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
416 sum += c4[i][j] * c4[i][j]; | |
417 } | |
418 } | |
419 } | |
420 | |
421 /* butterfly */ | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
422 s = 0.5 * sqrt(2.0); |
720 | 423 for(i=0;i<4;i++) { |
424 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
425 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
426 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s; |
720 | 427 } |
428 } | |
429 | |
430 /* idct8 on lines */ | |
431 for(i=0;i<8;i++) { | |
432 for(j=0;j<8;j++) { | |
433 sum = 0; | |
434 for(k=0;k<8;k++) | |
435 sum += c8[k][j] * block1[8*i+k]; | |
436 block2[8*i+j] = sum; | |
437 } | |
438 } | |
439 | |
440 /* idct4 */ | |
441 for(i=0;i<8;i++) { | |
442 for(j=0;j<4;j++) { | |
443 /* top */ | |
444 sum = 0; | |
445 for(k=0;k<4;k++) | |
446 sum += c4[k][j] * block2[8*(2*k)+i]; | |
447 block3[8*(2*j)+i] = sum; | |
448 | |
449 /* bottom */ | |
450 sum = 0; | |
451 for(k=0;k<4;k++) | |
452 sum += c4[k][j] * block2[8*(2*k+1)+i]; | |
453 block3[8*(2*j+1)+i] = sum; | |
454 } | |
455 } | |
456 | |
457 /* clamp and store the result */ | |
458 for(i=0;i<8;i++) { | |
459 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
460 v = block3[8*i+j]; |
720 | 461 if (v < 0) |
462 v = 0; | |
463 else if (v > 255) | |
464 v = 255; | |
465 dest[i * linesize + j] = (int)rint(v); | |
466 } | |
467 } | |
468 } | |
469 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
470 static void idct248_error(const char *name, |
1064 | 471 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block)) |
720 | 472 { |
473 int it, i, it1, ti, ti1, err_max, v; | |
474 | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
475 AVLFG prng; |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9189
diff
changeset
|
476 |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
477 av_lfg_init(&prng, 1); |
2967 | 478 |
720 | 479 /* just one test to see if code is correct (precision is less |
480 important here) */ | |
481 err_max = 0; | |
482 for(it=0;it<NB_ITS;it++) { | |
2967 | 483 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
484 /* XXX: use forward transform to generate values */ |
720 | 485 for(i=0;i<64;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
486 block1[i] = av_lfg_get(&prng) % 256 - 128; |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
487 block1[0] += 1024; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
488 |
720 | 489 for(i=0; i<64; i++) |
490 block[i]= block1[i]; | |
491 idct248_ref(img_dest1, 8, block); | |
2967 | 492 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
493 for(i=0; i<64; i++) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
494 block[i]= block1[i]; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
495 idct248_put(img_dest, 8, block); |
2967 | 496 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
497 for(i=0;i<64;i++) { |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
498 v = abs((int)img_dest[i] - (int)img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
499 if (v == 255) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
500 printf("%d %d\n", img_dest[i], img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
501 if (v > err_max) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
502 err_max = v; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
503 } |
720 | 504 #if 0 |
505 printf("ref=\n"); | |
506 for(i=0;i<8;i++) { | |
507 int j; | |
508 for(j=0;j<8;j++) { | |
509 printf(" %3d", img_dest1[i*8+j]); | |
510 } | |
511 printf("\n"); | |
512 } | |
2967 | 513 |
720 | 514 printf("out=\n"); |
515 for(i=0;i<8;i++) { | |
516 int j; | |
517 for(j=0;j<8;j++) { | |
518 printf(" %3d", img_dest[i*8+j]); | |
519 } | |
520 printf("\n"); | |
521 } | |
522 #endif | |
523 } | |
524 printf("%s %s: err_inf=%d\n", | |
525 1 ? "IDCT248" : "DCT248", | |
526 name, err_max); | |
527 | |
528 ti = gettime(); | |
529 it1 = 0; | |
530 do { | |
531 for(it=0;it<NB_ITS_SPEED;it++) { | |
532 for(i=0; i<64; i++) | |
533 block[i]= block1[i]; | |
534 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
5127 | 535 // do not memcpy especially not fastmemcpy because it does movntq !!! |
720 | 536 idct248_put(img_dest, 8, block); |
537 } | |
538 it1 += NB_ITS_SPEED; | |
539 ti1 = gettime() - ti; | |
540 } while (ti1 < 1000000); | |
7155 | 541 mmx_emms(); |
720 | 542 |
543 printf("%s %s: %0.1f kdct/s\n", | |
544 1 ? "IDCT248" : "DCT248", | |
545 name, (double)it1 * 1000.0 / (double)ti1); | |
546 } | |
547 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
548 static void help(void) |
33 | 549 { |
633 | 550 printf("dct-test [-i] [<test-number>]\n" |
551 "test-number 0 -> test with random matrixes\n" | |
552 " 1 -> test with random sparse matrixes\n" | |
553 " 2 -> do 3. test from mpeg4 std\n" | |
720 | 554 "-i test IDCT implementations\n" |
555 "-4 test IDCT248 implementations\n"); | |
33 | 556 } |
557 | |
0 | 558 int main(int argc, char **argv) |
559 { | |
720 | 560 int test_idct = 0, test_248_dct = 0; |
633 | 561 int c,i; |
562 int test=1; | |
7155 | 563 cpu_flags = mm_support(); |
33 | 564 |
9293 | 565 ff_ref_dct_init(); |
33 | 566 idct_mmx_init(); |
7135
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7130
diff
changeset
|
567 |
4197 | 568 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
633 | 569 for(i=0;i<MAX_NEG_CROP;i++) { |
4197 | 570 cropTbl[i] = 0; |
571 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
633 | 572 } |
2967 | 573 |
33 | 574 for(;;) { |
720 | 575 c = getopt(argc, argv, "ih4"); |
33 | 576 if (c == -1) |
577 break; | |
578 switch(c) { | |
579 case 'i': | |
580 test_idct = 1; | |
581 break; | |
720 | 582 case '4': |
583 test_248_dct = 1; | |
584 break; | |
633 | 585 default : |
33 | 586 case 'h': |
587 help(); | |
4754 | 588 return 0; |
33 | 589 } |
590 } | |
2967 | 591 |
633 | 592 if(optind <argc) test= atoi(argv[optind]); |
2967 | 593 |
33 | 594 printf("ffmpeg DCT/IDCT test\n"); |
595 | |
720 | 596 if (test_248_dct) { |
6001 | 597 idct248_error("SIMPLE-C", ff_simple_idct248_put); |
33 | 598 } else { |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
599 for (i=0;algos[i].name;i++) |
7140
654f063dc099
Use a local variable instead of mm_flags in dct-test.
astrange
parents:
7135
diff
changeset
|
600 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) { |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
601 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test); |
720 | 602 } |
33 | 603 } |
0 | 604 return 0; |
605 } |