comparison ppc/dsputil_ppc.c @ 7333:a8a79f5385f6 libavcodec

cosmetics: Reformat PPC code in libavcodec according to style guidelines. This includes indentation changes, comment reformatting, consistent brace placement and some prettyprinting.
author diego
date Sun, 20 Jul 2008 18:58:30 +0000
parents f7cbb7733146
children 3a93377e8b76
comparison
equal deleted inserted replaced
7332:b1003e468c3d 7333:a8a79f5385f6
58 58
59 #ifdef CONFIG_POWERPC_PERF 59 #ifdef CONFIG_POWERPC_PERF
60 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; 60 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
61 /* list below must match enum in dsputil_ppc.h */ 61 /* list below must match enum in dsputil_ppc.h */
62 static unsigned char* perfname[] = { 62 static unsigned char* perfname[] = {
63 "ff_fft_calc_altivec", 63 "ff_fft_calc_altivec",
64 "gmc1_altivec", 64 "gmc1_altivec",
65 "dct_unquantize_h263_altivec", 65 "dct_unquantize_h263_altivec",
66 "fdct_altivec", 66 "fdct_altivec",
67 "idct_add_altivec", 67 "idct_add_altivec",
68 "idct_put_altivec", 68 "idct_put_altivec",
69 "put_pixels16_altivec", 69 "put_pixels16_altivec",
70 "avg_pixels16_altivec", 70 "avg_pixels16_altivec",
71 "avg_pixels8_altivec", 71 "avg_pixels8_altivec",
72 "put_pixels8_xy2_altivec", 72 "put_pixels8_xy2_altivec",
73 "put_no_rnd_pixels8_xy2_altivec", 73 "put_no_rnd_pixels8_xy2_altivec",
74 "put_pixels16_xy2_altivec", 74 "put_pixels16_xy2_altivec",
75 "put_no_rnd_pixels16_xy2_altivec", 75 "put_no_rnd_pixels16_xy2_altivec",
76 "hadamard8_diff8x8_altivec", 76 "hadamard8_diff8x8_altivec",
77 "hadamard8_diff16_altivec", 77 "hadamard8_diff16_altivec",
78 "avg_pixels8_xy2_altivec", 78 "avg_pixels8_xy2_altivec",
79 "clear_blocks_dcbz32_ppc", 79 "clear_blocks_dcbz32_ppc",
80 "clear_blocks_dcbz128_ppc", 80 "clear_blocks_dcbz128_ppc",
81 "put_h264_chroma_mc8_altivec", 81 "put_h264_chroma_mc8_altivec",
82 "avg_h264_chroma_mc8_altivec", 82 "avg_h264_chroma_mc8_altivec",
83 "put_h264_qpel16_h_lowpass_altivec", 83 "put_h264_qpel16_h_lowpass_altivec",
84 "avg_h264_qpel16_h_lowpass_altivec", 84 "avg_h264_qpel16_h_lowpass_altivec",
85 "put_h264_qpel16_v_lowpass_altivec", 85 "put_h264_qpel16_v_lowpass_altivec",
86 "avg_h264_qpel16_v_lowpass_altivec", 86 "avg_h264_qpel16_v_lowpass_altivec",
87 "put_h264_qpel16_hv_lowpass_altivec", 87 "put_h264_qpel16_hv_lowpass_altivec",
88 "avg_h264_qpel16_hv_lowpass_altivec", 88 "avg_h264_qpel16_hv_lowpass_altivec",
89 "" 89 ""
90 }; 90 };
91 #include <stdio.h> 91 #include <stdio.h>
92 #endif 92 #endif
93 93
94 #ifdef CONFIG_POWERPC_PERF 94 #ifdef CONFIG_POWERPC_PERF
95 void powerpc_display_perf_report(void) 95 void powerpc_display_perf_report(void)
96 { 96 {
97 int i, j; 97 int i, j;
98 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); 98 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
99 for(i = 0 ; i < powerpc_perf_total ; i++) 99 for(i = 0 ; i < powerpc_perf_total ; i++) {
100 { 100 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
101 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) 101 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
102 { 102 av_log(NULL, AV_LOG_INFO,
103 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) 103 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
104 av_log(NULL, AV_LOG_INFO, 104 perfname[i],
105 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n", 105 j+1,
106 perfname[i], 106 perfdata[j][i][powerpc_data_min],
107 j+1, 107 perfdata[j][i][powerpc_data_max],
108 perfdata[j][i][powerpc_data_min], 108 (double)perfdata[j][i][powerpc_data_sum] /
109 perfdata[j][i][powerpc_data_max], 109 (double)perfdata[j][i][powerpc_data_num],
110 (double)perfdata[j][i][powerpc_data_sum] / 110 perfdata[j][i][powerpc_data_num]);
111 (double)perfdata[j][i][powerpc_data_num], 111 }
112 perfdata[j][i][powerpc_data_num]); 112 }
113 }
114 }
115 } 113 }
116 #endif /* CONFIG_POWERPC_PERF */ 114 #endif /* CONFIG_POWERPC_PERF */
117 115
118 /* ***** WARNING ***** WARNING ***** WARNING ***** */ 116 /* ***** WARNING ***** WARNING ***** WARNING ***** */
119 /* 117 /*
120 clear_blocks_dcbz32_ppc will not work properly 118 clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
121 on PowerPC processors with a cache line size 119 cache line size not equal to 32 bytes.
122 not equal to 32 bytes. 120 Fortunately all processor used by Apple up to at least the 7450 (aka second
123 Fortunately all processor used by Apple up to 121 generation G4) use 32 bytes cache line.
124 at least the 7450 (aka second generation G4) 122 This is due to the use of the 'dcbz' instruction. It simply clear to zero a
125 use 32 bytes cache line. 123 single cache line, so you need to know the cache line size to use it !
126 This is due to the use of the 'dcbz' instruction. 124 It's absurd, but it's fast...
127 It simply clear to zero a single cache line, 125
128 so you need to know the cache line size to use it ! 126 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
129 It's absurd, but it's fast... 127 size: 128 bytes. Oups.
130 128 The semantic of dcbz was changed, it always clear 32 bytes. so the function
131 update 24/06/2003 : Apple released yesterday the G5, 129 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
132 with a PPC970. cache line size : 128 bytes. Oups. 130 which is defined to clear a cache line (as dcbz before). So we still can
133 The semantic of dcbz was changed, it always clear 131 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
134 32 bytes. so the function below will work, but will 132
135 be slow. So I fixed check_dcbz_effect to use dcbzl, 133 see <http://developer.apple.com/technotes/tn/tn2087.html>
136 which is defined to clear a cache line (as dcbz before). 134 and <http://developer.apple.com/technotes/tn/tn2086.html>
137 So we still can distinguish, and use dcbz (32 bytes)
138 or dcbzl (one cache line) as required.
139
140 see <http://developer.apple.com/technotes/tn/tn2087.html>
141 and <http://developer.apple.com/technotes/tn/tn2086.html>
142 */ 135 */
143 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) 136 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
144 { 137 {
145 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); 138 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
146 register int misal = ((unsigned long)blocks & 0x00000010); 139 register int misal = ((unsigned long)blocks & 0x00000010);
147 register int i = 0; 140 register int i = 0;
148 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); 141 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
149 #if 1 142 #if 1
150 if (misal) { 143 if (misal) {
151 ((unsigned long*)blocks)[0] = 0L; 144 ((unsigned long*)blocks)[0] = 0L;
152 ((unsigned long*)blocks)[1] = 0L; 145 ((unsigned long*)blocks)[1] = 0L;
153 ((unsigned long*)blocks)[2] = 0L; 146 ((unsigned long*)blocks)[2] = 0L;
154 ((unsigned long*)blocks)[3] = 0L; 147 ((unsigned long*)blocks)[3] = 0L;
155 i += 16; 148 i += 16;
156 } 149 }
157 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { 150 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
158 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); 151 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
159 } 152 }
160 if (misal) { 153 if (misal) {
161 ((unsigned long*)blocks)[188] = 0L; 154 ((unsigned long*)blocks)[188] = 0L;
162 ((unsigned long*)blocks)[189] = 0L; 155 ((unsigned long*)blocks)[189] = 0L;
163 ((unsigned long*)blocks)[190] = 0L; 156 ((unsigned long*)blocks)[190] = 0L;
164 ((unsigned long*)blocks)[191] = 0L; 157 ((unsigned long*)blocks)[191] = 0L;
165 i += 16; 158 i += 16;
166 } 159 }
167 #else 160 #else
168 memset(blocks, 0, sizeof(DCTELEM)*6*64); 161 memset(blocks, 0, sizeof(DCTELEM)*6*64);
169 #endif 162 #endif
170 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); 163 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
178 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); 171 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
179 register int misal = ((unsigned long)blocks & 0x0000007f); 172 register int misal = ((unsigned long)blocks & 0x0000007f);
180 register int i = 0; 173 register int i = 0;
181 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); 174 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
182 #if 1 175 #if 1
183 if (misal) { 176 if (misal) {
184 // we could probably also optimize this case, 177 // we could probably also optimize this case,
185 // but there's not much point as the machines 178 // but there's not much point as the machines
186 // aren't available yet (2003-06-26) 179 // aren't available yet (2003-06-26)
187 memset(blocks, 0, sizeof(DCTELEM)*6*64); 180 memset(blocks, 0, sizeof(DCTELEM)*6*64);
188 } 181 }
189 else 182 else
190 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { 183 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
191 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); 184 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
192 } 185 }
193 #else 186 #else
194 memset(blocks, 0, sizeof(DCTELEM)*6*64); 187 memset(blocks, 0, sizeof(DCTELEM)*6*64);
195 #endif 188 #endif
196 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); 189 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
197 } 190 }
198 #else 191 #else
199 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) 192 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
200 { 193 {
201 memset(blocks, 0, sizeof(DCTELEM)*6*64); 194 memset(blocks, 0, sizeof(DCTELEM)*6*64);
202 } 195 }
203 #endif 196 #endif
204 197
205 #ifdef HAVE_DCBZL 198 #ifdef HAVE_DCBZL
206 /* check dcbz report how many bytes are set to 0 by dcbz */ 199 /* check dcbz report how many bytes are set to 0 by dcbz */
208 the intended effect (Apple "fixed" dcbz) 201 the intended effect (Apple "fixed" dcbz)
209 unfortunately this cannot be used unless the assembler 202 unfortunately this cannot be used unless the assembler
210 knows about dcbzl ... */ 203 knows about dcbzl ... */
211 long check_dcbzl_effect(void) 204 long check_dcbzl_effect(void)
212 { 205 {
213 register char *fakedata = av_malloc(1024); 206 register char *fakedata = av_malloc(1024);
214 register char *fakedata_middle; 207 register char *fakedata_middle;
215 register long zero = 0; 208 register long zero = 0;
216 register long i = 0; 209 register long i = 0;
217 long count = 0; 210 long count = 0;
218 211
219 if (!fakedata) 212 if (!fakedata) {
220 { 213 return 0L;
221 return 0L; 214 }
222 } 215
223 216 fakedata_middle = (fakedata + 512);
224 fakedata_middle = (fakedata + 512); 217
225 218 memset(fakedata, 0xFF, 1024);
226 memset(fakedata, 0xFF, 1024); 219
227 220 /* below the constraint "b" seems to mean "Address base register"
228 /* below the constraint "b" seems to mean "Address base register" 221 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
229 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ 222 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
230 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); 223
231 224 for (i = 0; i < 1024 ; i ++) {
232 for (i = 0; i < 1024 ; i ++) 225 if (fakedata[i] == (char)0)
233 { 226 count++;
234 if (fakedata[i] == (char)0) 227 }
235 count++; 228
236 } 229 av_free(fakedata);
237 230
238 av_free(fakedata); 231 return count;
239
240 return count;
241 } 232 }
242 #else 233 #else
243 long check_dcbzl_effect(void) 234 long check_dcbzl_effect(void)
244 { 235 {
245 return 0; 236 return 0;
284 int_init_altivec(c, avctx); 275 int_init_altivec(c, avctx);
285 c->gmc1 = gmc1_altivec; 276 c->gmc1 = gmc1_altivec;
286 277
287 #ifdef CONFIG_ENCODERS 278 #ifdef CONFIG_ENCODERS
288 if (avctx->dct_algo == FF_DCT_AUTO || 279 if (avctx->dct_algo == FF_DCT_AUTO ||
289 avctx->dct_algo == FF_DCT_ALTIVEC) 280 avctx->dct_algo == FF_DCT_ALTIVEC) {
290 {
291 c->fdct = fdct_altivec; 281 c->fdct = fdct_altivec;
292 } 282 }
293 #endif //CONFIG_ENCODERS 283 #endif //CONFIG_ENCODERS
294 284
295 if (avctx->lowres==0) 285 if (avctx->lowres==0) {
296 { 286 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
297 if ((avctx->idct_algo == FF_IDCT_AUTO) || 287 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
298 (avctx->idct_algo == FF_IDCT_ALTIVEC)) 288 c->idct_put = idct_put_altivec;
299 { 289 c->idct_add = idct_add_altivec;
300 c->idct_put = idct_put_altivec; 290 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
301 c->idct_add = idct_add_altivec; 291 }
302 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
303 }
304 } 292 }
305 293
306 #ifdef CONFIG_POWERPC_PERF 294 #ifdef CONFIG_POWERPC_PERF
307 { 295 {
308 int i, j; 296 int i, j;
309 for (i = 0 ; i < powerpc_perf_total ; i++) 297 for (i = 0 ; i < powerpc_perf_total ; i++) {
310 { 298 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
311 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) 299 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
312 { 300 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
313 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; 301 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
314 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; 302 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
315 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; 303 }
316 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
317 } 304 }
318 }
319 } 305 }
320 #endif /* CONFIG_POWERPC_PERF */ 306 #endif /* CONFIG_POWERPC_PERF */
321 } 307 }
322 #endif /* HAVE_ALTIVEC */ 308 #endif /* HAVE_ALTIVEC */
323 } 309 }