Mercurial > libavcodec.hg
annotate dct-test.c @ 12266:48d6738904a9 libavcodec
Fix SPLATB_REG mess. Used to be a if/elseif/elseif/elseif spaghetti, so this
splits it into small optimization-specific macros which are selected for each
DSP function. The advantage of this approach is that the sse4 functions now
use the ssse3 codepath also without needing an explicit sse4 codepath.
author | rbultje |
---|---|
date | Sat, 24 Jul 2010 19:33:05 +0000 |
parents | 7dd2a45249a9 |
children | a5ddb39627fd |
rev | line source |
---|---|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
1 /* |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
2 * (c) 2001 Fabrice Bellard |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
3 * 2007 Marc Hoffman <marc.hoffman@analog.com> |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
15 * Lesser General Public License for more details. |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
16 * |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
20 */ |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
21 |
1106 | 22 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11414
diff
changeset
|
23 * @file |
8724
2c5662c41129
cosmetics: Remove period after copyright statement non-sentence.
diego
parents:
8718
diff
changeset
|
24 * DCT test (c) 2001 Fabrice Bellard |
1106 | 25 * Started from sample code by Juan J. Sierralta P. |
26 */ | |
27 | |
0 | 28 #include <stdlib.h> |
29 #include <stdio.h> | |
30 #include <string.h> | |
31 #include <sys/time.h> | |
32 #include <unistd.h> | |
5118
3b190bc34546
Add some #includes to allow compilation without HAVE_AV_CONFIG_H.
diego
parents:
5110
diff
changeset
|
33 #include <math.h> |
0 | 34 |
7130
601509a430f7
Replace redundant MAX macro declaration by proper use of FFMAX.
diego
parents:
7125
diff
changeset
|
35 #include "libavutil/common.h" |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9189
diff
changeset
|
36 #include "libavutil/lfg.h" |
0 | 37 |
633 | 38 #include "simple_idct.h" |
8223 | 39 #include "aandcttab.h" |
1557 | 40 #include "faandct.h" |
6407 | 41 #include "faanidct.h" |
8430 | 42 #include "x86/idct_xvid.h" |
11408 | 43 #include "dctref.h" |
33 | 44 |
2872 | 45 #undef printf |
46 | |
8250 | 47 void ff_mmx_idct(DCTELEM *data); |
48 void ff_mmxext_idct(DCTELEM *data); | |
33 | 49 |
8250 | 50 void odivx_idct_c(short *block); |
633 | 51 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
52 // BFIN |
8250 | 53 void ff_bfin_idct(DCTELEM *block); |
54 void ff_bfin_fdct(DCTELEM *block); | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
55 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
56 // ALTIVEC |
8250 | 57 void fdct_altivec(DCTELEM *block); |
58 //void idct_altivec(DCTELEM *block);?? no routine | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
59 |
8351 | 60 // ARM |
11414 | 61 void ff_j_rev_dct_arm(DCTELEM *data); |
62 void ff_simple_idct_arm(DCTELEM *data); | |
63 void ff_simple_idct_armv5te(DCTELEM *data); | |
8351 | 64 void ff_simple_idct_armv6(DCTELEM *data); |
65 void ff_simple_idct_neon(DCTELEM *data); | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
66 |
8622 | 67 void ff_simple_idct_axp(DCTELEM *data); |
68 | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
69 struct algo { |
7125
65e7e714c770
Mark constant structure member as const to avoid some warnings.
diego
parents:
6602
diff
changeset
|
70 const char *name; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
71 enum { FDCT, IDCT } is_idct; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
72 void (* func) (DCTELEM *block); |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
73 void (* ref) (DCTELEM *block); |
8350 | 74 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format; |
6543
948d9453432b
check mm_flags for each DCT and skips the ones that can't be run
gpoirier
parents:
6542
diff
changeset
|
75 int mm_support; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
76 }; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
77 |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
78 #ifndef FAAN_POSTSCALE |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
79 #define FAAN_SCALE SCALE_PERM |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
80 #else |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
81 #define FAAN_SCALE NO_PERM |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
82 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
83 |
7155 | 84 static int cpu_flags; |
85 | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
86 struct algo algos[] = { |
9293 | 87 {"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM}, |
88 {"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE}, | |
89 {"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM}, | |
90 {"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM}, | |
91 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM}, | |
92 {"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM}, | |
93 {"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM}, | |
94 {"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM}, | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
95 |
8590 | 96 #if HAVE_MMX |
9293 | 97 {"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, FF_MM_MMX}, |
8590 | 98 #if HAVE_MMX2 |
9406
8d51b340393e
Replace deprecated FF_MM_MMXEXT flag with FF_MM_MMX2.
stefano
parents:
9388
diff
changeset
|
99 {"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, FF_MM_MMX2}, |
9293 | 100 {"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, FF_MM_SSE2}, |
5110 | 101 #endif |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
102 |
8590 | 103 #if CONFIG_GPL |
9293 | 104 {"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX}, |
9406
8d51b340393e
Replace deprecated FF_MM_MMXEXT flag with FF_MM_MMX2.
stefano
parents:
9388
diff
changeset
|
105 {"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, FF_MM_MMX2}, |
5109 | 106 #endif |
9293 | 107 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX}, |
108 {"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, FF_MM_MMX}, | |
9406
8d51b340393e
Replace deprecated FF_MM_MMXEXT flag with FF_MM_MMX2.
stefano
parents:
9388
diff
changeset
|
109 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, FF_MM_MMX2}, |
9293 | 110 {"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, FF_MM_SSE2}, |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
111 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
112 |
8590 | 113 #if HAVE_ALTIVEC |
9293 | 114 {"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC}, |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
115 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
116 |
8590 | 117 #if ARCH_BFIN |
9293 | 118 {"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM}, |
119 {"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM}, | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
120 #endif |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
121 |
8590 | 122 #if ARCH_ARM |
11414 | 123 {"SIMPLE-ARM", 1, ff_simple_idct_arm, ff_ref_idct, NO_PERM }, |
124 {"INT-ARM", 1, ff_j_rev_dct_arm, ff_ref_idct, MMX_PERM }, | |
8590 | 125 #if HAVE_ARMV5TE |
11414 | 126 {"SIMPLE-ARMV5TE", 1, ff_simple_idct_armv5te, ff_ref_idct, NO_PERM }, |
8351 | 127 #endif |
8590 | 128 #if HAVE_ARMV6 |
9293 | 129 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM }, |
8351 | 130 #endif |
8590 | 131 #if HAVE_NEON |
9293 | 132 {"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM }, |
8351 | 133 #endif |
8359 | 134 #endif /* ARCH_ARM */ |
8351 | 135 |
8622 | 136 #if ARCH_ALPHA |
9293 | 137 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM }, |
8622 | 138 #endif |
139 | |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
140 { 0 } |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
141 }; |
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
142 |
0 | 143 #define AANSCALE_BITS 12 |
144 | |
4197 | 145 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; |
633 | 146 |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
147 static int64_t gettime(void) |
0 | 148 { |
149 struct timeval tv; | |
150 gettimeofday(&tv,NULL); | |
1064 | 151 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; |
0 | 152 } |
153 | |
154 #define NB_ITS 20000 | |
155 #define NB_ITS_SPEED 50000 | |
156 | |
33 | 157 static short idct_mmx_perm[64]; |
158 | |
633 | 159 static short idct_simple_mmx_perm[64]={ |
2979 | 160 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
161 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
162 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
163 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
164 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
165 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
166 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
167 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
633 | 168 }; |
169 | |
6602 | 170 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; |
171 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
172 static void idct_mmx_init(void) |
33 | 173 { |
174 int i; | |
175 | |
176 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |
177 for (i = 0; i < 64; i++) { | |
2979 | 178 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
179 // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |
33 | 180 } |
181 } | |
182 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
9793
diff
changeset
|
183 DECLARE_ALIGNED(16, static DCTELEM, block)[64]; |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
9793
diff
changeset
|
184 DECLARE_ALIGNED(8, static DCTELEM, block1)[64]; |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
9793
diff
changeset
|
185 DECLARE_ALIGNED(8, static DCTELEM, block_org)[64]; |
33 | 186 |
7155 | 187 static inline void mmx_emms(void) |
188 { | |
8590 | 189 #if HAVE_MMX |
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
190 if (cpu_flags & FF_MM_MMX) |
8031 | 191 __asm__ volatile ("emms\n\t"); |
7155 | 192 #endif |
193 } | |
194 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
195 static void dct_error(const char *name, int is_idct, |
33 | 196 void (*fdct_func)(DCTELEM *block), |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
197 void (*fdct_ref)(DCTELEM *block), int form, int test) |
0 | 198 { |
199 int it, i, scale; | |
200 int err_inf, v; | |
1064 | 201 int64_t err2, ti, ti1, it1; |
202 int64_t sysErr[64], sysErrMax=0; | |
633 | 203 int maxout=0; |
204 int blockSumErrMax=0, blockSumErr; | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
205 AVLFG prng; |
0 | 206 |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
207 av_lfg_init(&prng, 1); |
0 | 208 |
209 err_inf = 0; | |
210 err2 = 0; | |
633 | 211 for(i=0; i<64; i++) sysErr[i]=0; |
0 | 212 for(it=0;it<NB_ITS;it++) { |
633 | 213 for(i=0;i<64;i++) |
214 block1[i] = 0; | |
215 switch(test){ | |
2967 | 216 case 0: |
633 | 217 for(i=0;i<64;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
218 block1[i] = (av_lfg_get(&prng) % 512) -256; |
635 | 219 if (is_idct){ |
9293 | 220 ff_ref_fdct(block1); |
635 | 221 |
222 for(i=0;i<64;i++) | |
223 block1[i]>>=3; | |
224 } | |
633 | 225 break; |
226 case 1:{ | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
227 int num = av_lfg_get(&prng) % 10 + 1; |
633 | 228 for(i=0;i<num;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
229 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256; |
633 | 230 }break; |
231 case 2: | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
232 block1[0] = av_lfg_get(&prng) % 4096 - 2048; |
633 | 233 block1[63]= (block1[0]&1)^1; |
234 break; | |
235 } | |
33 | 236 |
633 | 237 #if 0 // simulate mismatch control |
238 { int sum=0; | |
239 for(i=0;i<64;i++) | |
240 sum+=block1[i]; | |
241 | |
2967 | 242 if((sum&1)==0) block1[63]^=1; |
633 | 243 } |
244 #endif | |
245 | |
246 for(i=0; i<64; i++) | |
247 block_org[i]= block1[i]; | |
33 | 248 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
249 if (form == MMX_PERM) { |
633 | 250 for(i=0;i<64;i++) |
33 | 251 block[idct_mmx_perm[i]] = block1[i]; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
252 } else if (form == MMX_SIMPLE_PERM) { |
633 | 253 for(i=0;i<64;i++) |
254 block[idct_simple_mmx_perm[i]] = block1[i]; | |
255 | |
6602 | 256 } else if (form == SSE2_PERM) { |
257 for(i=0; i<64; i++) | |
258 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i]; | |
8350 | 259 } else if (form == PARTTRANS_PERM) { |
260 for(i=0; i<64; i++) | |
261 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i]; | |
2979 | 262 } else { |
633 | 263 for(i=0; i<64; i++) |
264 block[i]= block1[i]; | |
33 | 265 } |
633 | 266 #if 0 // simulate mismatch control for tested IDCT but not the ref |
267 { int sum=0; | |
268 for(i=0;i<64;i++) | |
269 sum+=block[i]; | |
270 | |
2967 | 271 if((sum&1)==0) block[63]^=1; |
633 | 272 } |
273 #endif | |
33 | 274 |
0 | 275 fdct_func(block); |
7155 | 276 mmx_emms(); |
33 | 277 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
278 if (form == SCALE_PERM) { |
0 | 279 for(i=0; i<64; i++) { |
8223 | 280 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; |
633 | 281 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; |
282 } | |
283 } | |
284 | |
33 | 285 fdct_ref(block1); |
0 | 286 |
633 | 287 blockSumErr=0; |
0 | 288 for(i=0;i<64;i++) { |
289 v = abs(block[i] - block1[i]); | |
290 if (v > err_inf) | |
291 err_inf = v; | |
292 err2 += v * v; | |
2979 | 293 sysErr[i] += block[i] - block1[i]; |
294 blockSumErr += v; | |
295 if( abs(block[i])>maxout) maxout=abs(block[i]); | |
0 | 296 } |
633 | 297 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; |
298 #if 0 // print different matrix pairs | |
299 if(blockSumErr){ | |
300 printf("\n"); | |
301 for(i=0; i<64; i++){ | |
302 if((i&7)==0) printf("\n"); | |
303 printf("%4d ", block_org[i]); | |
304 } | |
305 for(i=0; i<64; i++){ | |
306 if((i&7)==0) printf("\n"); | |
307 printf("%4d ", block[i] - block1[i]); | |
308 } | |
309 } | |
310 #endif | |
0 | 311 } |
7130
601509a430f7
Replace redundant MAX macro declaration by proper use of FFMAX.
diego
parents:
7125
diff
changeset
|
312 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i])); |
2967 | 313 |
633 | 314 #if 1 // dump systematic errors |
315 for(i=0; i<64; i++){ | |
2979 | 316 if(i%8==0) printf("\n"); |
9189 | 317 printf("%7d ", (int)sysErr[i]); |
633 | 318 } |
319 printf("\n"); | |
320 #endif | |
2967 | 321 |
633 | 322 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
33 | 323 is_idct ? "IDCT" : "DCT", |
633 | 324 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); |
325 #if 1 //Speed test | |
0 | 326 /* speed test */ |
633 | 327 for(i=0;i<64;i++) |
328 block1[i] = 0; | |
329 switch(test){ | |
2967 | 330 case 0: |
633 | 331 for(i=0;i<64;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
332 block1[i] = av_lfg_get(&prng) % 512 -256; |
635 | 333 if (is_idct){ |
9293 | 334 ff_ref_fdct(block1); |
635 | 335 |
336 for(i=0;i<64;i++) | |
337 block1[i]>>=3; | |
338 } | |
633 | 339 break; |
340 case 1:{ | |
341 case 2: | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
342 block1[0] = av_lfg_get(&prng) % 512 -256; |
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
343 block1[1] = av_lfg_get(&prng) % 512 -256; |
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
344 block1[2] = av_lfg_get(&prng) % 512 -256; |
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
345 block1[3] = av_lfg_get(&prng) % 512 -256; |
633 | 346 }break; |
347 } | |
0 | 348 |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
349 if (form == MMX_PERM) { |
633 | 350 for(i=0;i<64;i++) |
33 | 351 block[idct_mmx_perm[i]] = block1[i]; |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
352 } else if(form == MMX_SIMPLE_PERM) { |
633 | 353 for(i=0;i<64;i++) |
354 block[idct_simple_mmx_perm[i]] = block1[i]; | |
355 } else { | |
356 for(i=0; i<64; i++) | |
357 block[i]= block1[i]; | |
33 | 358 } |
359 | |
0 | 360 ti = gettime(); |
361 it1 = 0; | |
362 do { | |
363 for(it=0;it<NB_ITS_SPEED;it++) { | |
633 | 364 for(i=0; i<64; i++) |
365 block[i]= block1[i]; | |
366 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
5127 | 367 // do not memcpy especially not fastmemcpy because it does movntq !!! |
0 | 368 fdct_func(block); |
369 } | |
370 it1 += NB_ITS_SPEED; | |
371 ti1 = gettime() - ti; | |
372 } while (ti1 < 1000000); | |
7155 | 373 mmx_emms(); |
0 | 374 |
633 | 375 printf("%s %s: %0.1f kdct/s\n", |
33 | 376 is_idct ? "IDCT" : "DCT", |
0 | 377 name, (double)it1 * 1000.0 / (double)ti1); |
633 | 378 #endif |
0 | 379 } |
380 | |
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
9793
diff
changeset
|
381 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; |
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
9793
diff
changeset
|
382 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64]; |
720 | 383 |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
384 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block) |
720 | 385 { |
386 static int init; | |
387 static double c8[8][8]; | |
388 static double c4[4][4]; | |
389 double block1[64], block2[64], block3[64]; | |
390 double s, sum, v; | |
391 int i, j, k; | |
392 | |
393 if (!init) { | |
394 init = 1; | |
395 | |
396 for(i=0;i<8;i++) { | |
397 sum = 0; | |
398 for(j=0;j<8;j++) { | |
399 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0); | |
400 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
401 sum += c8[i][j] * c8[i][j]; | |
402 } | |
403 } | |
2967 | 404 |
720 | 405 for(i=0;i<4;i++) { |
406 sum = 0; | |
407 for(j=0;j<4;j++) { | |
408 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0); | |
409 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
410 sum += c4[i][j] * c4[i][j]; | |
411 } | |
412 } | |
413 } | |
414 | |
415 /* butterfly */ | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
416 s = 0.5 * sqrt(2.0); |
720 | 417 for(i=0;i<4;i++) { |
418 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
419 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
420 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s; |
720 | 421 } |
422 } | |
423 | |
424 /* idct8 on lines */ | |
425 for(i=0;i<8;i++) { | |
426 for(j=0;j<8;j++) { | |
427 sum = 0; | |
428 for(k=0;k<8;k++) | |
429 sum += c8[k][j] * block1[8*i+k]; | |
430 block2[8*i+j] = sum; | |
431 } | |
432 } | |
433 | |
434 /* idct4 */ | |
435 for(i=0;i<8;i++) { | |
436 for(j=0;j<4;j++) { | |
437 /* top */ | |
438 sum = 0; | |
439 for(k=0;k<4;k++) | |
440 sum += c4[k][j] * block2[8*(2*k)+i]; | |
441 block3[8*(2*j)+i] = sum; | |
442 | |
443 /* bottom */ | |
444 sum = 0; | |
445 for(k=0;k<4;k++) | |
446 sum += c4[k][j] * block2[8*(2*k+1)+i]; | |
447 block3[8*(2*j+1)+i] = sum; | |
448 } | |
449 } | |
450 | |
451 /* clamp and store the result */ | |
452 for(i=0;i<8;i++) { | |
453 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
454 v = block3[8*i+j]; |
720 | 455 if (v < 0) |
456 v = 0; | |
457 else if (v > 255) | |
458 v = 255; | |
459 dest[i * linesize + j] = (int)rint(v); | |
460 } | |
461 } | |
462 } | |
463 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
464 static void idct248_error(const char *name, |
1064 | 465 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block)) |
720 | 466 { |
467 int it, i, it1, ti, ti1, err_max, v; | |
468 | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
469 AVLFG prng; |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9189
diff
changeset
|
470 |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
471 av_lfg_init(&prng, 1); |
2967 | 472 |
720 | 473 /* just one test to see if code is correct (precision is less |
474 important here) */ | |
475 err_max = 0; | |
476 for(it=0;it<NB_ITS;it++) { | |
2967 | 477 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
478 /* XXX: use forward transform to generate values */ |
720 | 479 for(i=0;i<64;i++) |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9295
diff
changeset
|
480 block1[i] = av_lfg_get(&prng) % 256 - 128; |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
481 block1[0] += 1024; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
482 |
720 | 483 for(i=0; i<64; i++) |
484 block[i]= block1[i]; | |
485 idct248_ref(img_dest1, 8, block); | |
2967 | 486 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
487 for(i=0; i<64; i++) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
488 block[i]= block1[i]; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
489 idct248_put(img_dest, 8, block); |
2967 | 490 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
491 for(i=0;i<64;i++) { |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
492 v = abs((int)img_dest[i] - (int)img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
493 if (v == 255) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
494 printf("%d %d\n", img_dest[i], img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
495 if (v > err_max) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
496 err_max = v; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
497 } |
720 | 498 #if 0 |
499 printf("ref=\n"); | |
500 for(i=0;i<8;i++) { | |
501 int j; | |
502 for(j=0;j<8;j++) { | |
503 printf(" %3d", img_dest1[i*8+j]); | |
504 } | |
505 printf("\n"); | |
506 } | |
2967 | 507 |
720 | 508 printf("out=\n"); |
509 for(i=0;i<8;i++) { | |
510 int j; | |
511 for(j=0;j<8;j++) { | |
512 printf(" %3d", img_dest[i*8+j]); | |
513 } | |
514 printf("\n"); | |
515 } | |
516 #endif | |
517 } | |
518 printf("%s %s: err_inf=%d\n", | |
519 1 ? "IDCT248" : "DCT248", | |
520 name, err_max); | |
521 | |
522 ti = gettime(); | |
523 it1 = 0; | |
524 do { | |
525 for(it=0;it<NB_ITS_SPEED;it++) { | |
526 for(i=0; i<64; i++) | |
527 block[i]= block1[i]; | |
528 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
5127 | 529 // do not memcpy especially not fastmemcpy because it does movntq !!! |
720 | 530 idct248_put(img_dest, 8, block); |
531 } | |
532 it1 += NB_ITS_SPEED; | |
533 ti1 = gettime() - ti; | |
534 } while (ti1 < 1000000); | |
7155 | 535 mmx_emms(); |
720 | 536 |
537 printf("%s %s: %0.1f kdct/s\n", | |
538 1 ? "IDCT248" : "DCT248", | |
539 name, (double)it1 * 1000.0 / (double)ti1); | |
540 } | |
541 | |
9295
b225f51903af
Mark non-exported functions in test and example programs as static.
diego
parents:
9293
diff
changeset
|
542 static void help(void) |
33 | 543 { |
633 | 544 printf("dct-test [-i] [<test-number>]\n" |
545 "test-number 0 -> test with random matrixes\n" | |
546 " 1 -> test with random sparse matrixes\n" | |
547 " 2 -> do 3. test from mpeg4 std\n" | |
720 | 548 "-i test IDCT implementations\n" |
549 "-4 test IDCT248 implementations\n"); | |
33 | 550 } |
551 | |
0 | 552 int main(int argc, char **argv) |
553 { | |
720 | 554 int test_idct = 0, test_248_dct = 0; |
633 | 555 int c,i; |
556 int test=1; | |
7155 | 557 cpu_flags = mm_support(); |
33 | 558 |
9293 | 559 ff_ref_dct_init(); |
33 | 560 idct_mmx_init(); |
7135
6bd6a2da306e
Define mm_flags/support to be 0 on architectures where they don't exist.
astrange
parents:
7130
diff
changeset
|
561 |
4197 | 562 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
633 | 563 for(i=0;i<MAX_NEG_CROP;i++) { |
4197 | 564 cropTbl[i] = 0; |
565 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
633 | 566 } |
2967 | 567 |
33 | 568 for(;;) { |
720 | 569 c = getopt(argc, argv, "ih4"); |
33 | 570 if (c == -1) |
571 break; | |
572 switch(c) { | |
573 case 'i': | |
574 test_idct = 1; | |
575 break; | |
720 | 576 case '4': |
577 test_248_dct = 1; | |
578 break; | |
633 | 579 default : |
33 | 580 case 'h': |
581 help(); | |
4754 | 582 return 0; |
33 | 583 } |
584 } | |
2967 | 585 |
633 | 586 if(optind <argc) test= atoi(argv[optind]); |
2967 | 587 |
33 | 588 printf("ffmpeg DCT/IDCT test\n"); |
589 | |
720 | 590 if (test_248_dct) { |
6001 | 591 idct248_error("SIMPLE-C", ff_simple_idct248_put); |
33 | 592 } else { |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
593 for (i=0;algos[i].name;i++) |
7140
654f063dc099
Use a local variable instead of mm_flags in dct-test.
astrange
parents:
7135
diff
changeset
|
594 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) { |
4755
b7c27288e509
platform-independent restructuring and code simplification
diego
parents:
4754
diff
changeset
|
595 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test); |
720 | 596 } |
33 | 597 } |
0 | 598 return 0; |
599 } |