Mercurial > libavcodec.hg
comparison ppc/dsputil_ppc.c @ 7333:a8a79f5385f6 libavcodec
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
This includes indentation changes, comment reformatting, consistent brace
placement and some prettyprinting.
author | diego |
---|---|
date | Sun, 20 Jul 2008 18:58:30 +0000 |
parents | f7cbb7733146 |
children | 3a93377e8b76 |
comparison
equal
deleted
inserted
replaced
7332:b1003e468c3d | 7333:a8a79f5385f6 |
---|---|
58 | 58 |
59 #ifdef CONFIG_POWERPC_PERF | 59 #ifdef CONFIG_POWERPC_PERF |
60 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; | 60 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; |
61 /* list below must match enum in dsputil_ppc.h */ | 61 /* list below must match enum in dsputil_ppc.h */ |
62 static unsigned char* perfname[] = { | 62 static unsigned char* perfname[] = { |
63 "ff_fft_calc_altivec", | 63 "ff_fft_calc_altivec", |
64 "gmc1_altivec", | 64 "gmc1_altivec", |
65 "dct_unquantize_h263_altivec", | 65 "dct_unquantize_h263_altivec", |
66 "fdct_altivec", | 66 "fdct_altivec", |
67 "idct_add_altivec", | 67 "idct_add_altivec", |
68 "idct_put_altivec", | 68 "idct_put_altivec", |
69 "put_pixels16_altivec", | 69 "put_pixels16_altivec", |
70 "avg_pixels16_altivec", | 70 "avg_pixels16_altivec", |
71 "avg_pixels8_altivec", | 71 "avg_pixels8_altivec", |
72 "put_pixels8_xy2_altivec", | 72 "put_pixels8_xy2_altivec", |
73 "put_no_rnd_pixels8_xy2_altivec", | 73 "put_no_rnd_pixels8_xy2_altivec", |
74 "put_pixels16_xy2_altivec", | 74 "put_pixels16_xy2_altivec", |
75 "put_no_rnd_pixels16_xy2_altivec", | 75 "put_no_rnd_pixels16_xy2_altivec", |
76 "hadamard8_diff8x8_altivec", | 76 "hadamard8_diff8x8_altivec", |
77 "hadamard8_diff16_altivec", | 77 "hadamard8_diff16_altivec", |
78 "avg_pixels8_xy2_altivec", | 78 "avg_pixels8_xy2_altivec", |
79 "clear_blocks_dcbz32_ppc", | 79 "clear_blocks_dcbz32_ppc", |
80 "clear_blocks_dcbz128_ppc", | 80 "clear_blocks_dcbz128_ppc", |
81 "put_h264_chroma_mc8_altivec", | 81 "put_h264_chroma_mc8_altivec", |
82 "avg_h264_chroma_mc8_altivec", | 82 "avg_h264_chroma_mc8_altivec", |
83 "put_h264_qpel16_h_lowpass_altivec", | 83 "put_h264_qpel16_h_lowpass_altivec", |
84 "avg_h264_qpel16_h_lowpass_altivec", | 84 "avg_h264_qpel16_h_lowpass_altivec", |
85 "put_h264_qpel16_v_lowpass_altivec", | 85 "put_h264_qpel16_v_lowpass_altivec", |
86 "avg_h264_qpel16_v_lowpass_altivec", | 86 "avg_h264_qpel16_v_lowpass_altivec", |
87 "put_h264_qpel16_hv_lowpass_altivec", | 87 "put_h264_qpel16_hv_lowpass_altivec", |
88 "avg_h264_qpel16_hv_lowpass_altivec", | 88 "avg_h264_qpel16_hv_lowpass_altivec", |
89 "" | 89 "" |
90 }; | 90 }; |
91 #include <stdio.h> | 91 #include <stdio.h> |
92 #endif | 92 #endif |
93 | 93 |
94 #ifdef CONFIG_POWERPC_PERF | 94 #ifdef CONFIG_POWERPC_PERF |
95 void powerpc_display_perf_report(void) | 95 void powerpc_display_perf_report(void) |
96 { | 96 { |
97 int i, j; | 97 int i, j; |
98 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); | 98 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); |
99 for(i = 0 ; i < powerpc_perf_total ; i++) | 99 for(i = 0 ; i < powerpc_perf_total ; i++) { |
100 { | 100 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) { |
101 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | 101 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) |
102 { | 102 av_log(NULL, AV_LOG_INFO, |
103 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) | 103 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n", |
104 av_log(NULL, AV_LOG_INFO, | 104 perfname[i], |
105 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n", | 105 j+1, |
106 perfname[i], | 106 perfdata[j][i][powerpc_data_min], |
107 j+1, | 107 perfdata[j][i][powerpc_data_max], |
108 perfdata[j][i][powerpc_data_min], | 108 (double)perfdata[j][i][powerpc_data_sum] / |
109 perfdata[j][i][powerpc_data_max], | 109 (double)perfdata[j][i][powerpc_data_num], |
110 (double)perfdata[j][i][powerpc_data_sum] / | 110 perfdata[j][i][powerpc_data_num]); |
111 (double)perfdata[j][i][powerpc_data_num], | 111 } |
112 perfdata[j][i][powerpc_data_num]); | 112 } |
113 } | |
114 } | |
115 } | 113 } |
116 #endif /* CONFIG_POWERPC_PERF */ | 114 #endif /* CONFIG_POWERPC_PERF */ |
117 | 115 |
118 /* ***** WARNING ***** WARNING ***** WARNING ***** */ | 116 /* ***** WARNING ***** WARNING ***** WARNING ***** */ |
119 /* | 117 /* |
120 clear_blocks_dcbz32_ppc will not work properly | 118 clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a |
121 on PowerPC processors with a cache line size | 119 cache line size not equal to 32 bytes. |
122 not equal to 32 bytes. | 120 Fortunately all processor used by Apple up to at least the 7450 (aka second |
123 Fortunately all processor used by Apple up to | 121 generation G4) use 32 bytes cache line. |
124 at least the 7450 (aka second generation G4) | 122 This is due to the use of the 'dcbz' instruction. It simply clear to zero a |
125 use 32 bytes cache line. | 123 single cache line, so you need to know the cache line size to use it ! |
126 This is due to the use of the 'dcbz' instruction. | 124 It's absurd, but it's fast... |
127 It simply clear to zero a single cache line, | 125 |
128 so you need to know the cache line size to use it ! | 126 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line |
129 It's absurd, but it's fast... | 127 size: 128 bytes. Oups. |
130 | 128 The semantic of dcbz was changed, it always clear 32 bytes. so the function |
131 update 24/06/2003 : Apple released yesterday the G5, | 129 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, |
132 with a PPC970. cache line size : 128 bytes. Oups. | 130 which is defined to clear a cache line (as dcbz before). So we still can |
133 The semantic of dcbz was changed, it always clear | 131 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. |
134 32 bytes. so the function below will work, but will | 132 |
135 be slow. So I fixed check_dcbz_effect to use dcbzl, | 133 see <http://developer.apple.com/technotes/tn/tn2087.html> |
136 which is defined to clear a cache line (as dcbz before). | 134 and <http://developer.apple.com/technotes/tn/tn2086.html> |
137 So we still can distinguish, and use dcbz (32 bytes) | |
138 or dcbzl (one cache line) as required. | |
139 | |
140 see <http://developer.apple.com/technotes/tn/tn2087.html> | |
141 and <http://developer.apple.com/technotes/tn/tn2086.html> | |
142 */ | 135 */ |
143 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) | 136 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) |
144 { | 137 { |
145 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); | 138 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); |
146 register int misal = ((unsigned long)blocks & 0x00000010); | 139 register int misal = ((unsigned long)blocks & 0x00000010); |
147 register int i = 0; | 140 register int i = 0; |
148 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | 141 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); |
149 #if 1 | 142 #if 1 |
150 if (misal) { | 143 if (misal) { |
151 ((unsigned long*)blocks)[0] = 0L; | 144 ((unsigned long*)blocks)[0] = 0L; |
152 ((unsigned long*)blocks)[1] = 0L; | 145 ((unsigned long*)blocks)[1] = 0L; |
153 ((unsigned long*)blocks)[2] = 0L; | 146 ((unsigned long*)blocks)[2] = 0L; |
154 ((unsigned long*)blocks)[3] = 0L; | 147 ((unsigned long*)blocks)[3] = 0L; |
155 i += 16; | 148 i += 16; |
156 } | 149 } |
157 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { | 150 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { |
158 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); | 151 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
159 } | 152 } |
160 if (misal) { | 153 if (misal) { |
161 ((unsigned long*)blocks)[188] = 0L; | 154 ((unsigned long*)blocks)[188] = 0L; |
162 ((unsigned long*)blocks)[189] = 0L; | 155 ((unsigned long*)blocks)[189] = 0L; |
163 ((unsigned long*)blocks)[190] = 0L; | 156 ((unsigned long*)blocks)[190] = 0L; |
164 ((unsigned long*)blocks)[191] = 0L; | 157 ((unsigned long*)blocks)[191] = 0L; |
165 i += 16; | 158 i += 16; |
166 } | 159 } |
167 #else | 160 #else |
168 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 161 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
169 #endif | 162 #endif |
170 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | 163 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); |
178 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); | 171 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); |
179 register int misal = ((unsigned long)blocks & 0x0000007f); | 172 register int misal = ((unsigned long)blocks & 0x0000007f); |
180 register int i = 0; | 173 register int i = 0; |
181 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | 174 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); |
182 #if 1 | 175 #if 1 |
183 if (misal) { | 176 if (misal) { |
184 // we could probably also optimize this case, | 177 // we could probably also optimize this case, |
185 // but there's not much point as the machines | 178 // but there's not much point as the machines |
186 // aren't available yet (2003-06-26) | 179 // aren't available yet (2003-06-26) |
187 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 180 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
188 } | 181 } |
189 else | 182 else |
190 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { | 183 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { |
191 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); | 184 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
192 } | 185 } |
193 #else | 186 #else |
194 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 187 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
195 #endif | 188 #endif |
196 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); | 189 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); |
197 } | 190 } |
198 #else | 191 #else |
199 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | 192 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
200 { | 193 { |
201 memset(blocks, 0, sizeof(DCTELEM)*6*64); | 194 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
202 } | 195 } |
203 #endif | 196 #endif |
204 | 197 |
205 #ifdef HAVE_DCBZL | 198 #ifdef HAVE_DCBZL |
206 /* check dcbz report how many bytes are set to 0 by dcbz */ | 199 /* check dcbz report how many bytes are set to 0 by dcbz */ |
208 the intended effect (Apple "fixed" dcbz) | 201 the intended effect (Apple "fixed" dcbz) |
209 unfortunately this cannot be used unless the assembler | 202 unfortunately this cannot be used unless the assembler |
210 knows about dcbzl ... */ | 203 knows about dcbzl ... */ |
211 long check_dcbzl_effect(void) | 204 long check_dcbzl_effect(void) |
212 { | 205 { |
213 register char *fakedata = av_malloc(1024); | 206 register char *fakedata = av_malloc(1024); |
214 register char *fakedata_middle; | 207 register char *fakedata_middle; |
215 register long zero = 0; | 208 register long zero = 0; |
216 register long i = 0; | 209 register long i = 0; |
217 long count = 0; | 210 long count = 0; |
218 | 211 |
219 if (!fakedata) | 212 if (!fakedata) { |
220 { | 213 return 0L; |
221 return 0L; | 214 } |
222 } | 215 |
223 | 216 fakedata_middle = (fakedata + 512); |
224 fakedata_middle = (fakedata + 512); | 217 |
225 | 218 memset(fakedata, 0xFF, 1024); |
226 memset(fakedata, 0xFF, 1024); | 219 |
227 | 220 /* below the constraint "b" seems to mean "Address base register" |
228 /* below the constraint "b" seems to mean "Address base register" | 221 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ |
229 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ | 222 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
230 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); | 223 |
231 | 224 for (i = 0; i < 1024 ; i ++) { |
232 for (i = 0; i < 1024 ; i ++) | 225 if (fakedata[i] == (char)0) |
233 { | 226 count++; |
234 if (fakedata[i] == (char)0) | 227 } |
235 count++; | 228 |
236 } | 229 av_free(fakedata); |
237 | 230 |
238 av_free(fakedata); | 231 return count; |
239 | |
240 return count; | |
241 } | 232 } |
242 #else | 233 #else |
243 long check_dcbzl_effect(void) | 234 long check_dcbzl_effect(void) |
244 { | 235 { |
245 return 0; | 236 return 0; |
284 int_init_altivec(c, avctx); | 275 int_init_altivec(c, avctx); |
285 c->gmc1 = gmc1_altivec; | 276 c->gmc1 = gmc1_altivec; |
286 | 277 |
287 #ifdef CONFIG_ENCODERS | 278 #ifdef CONFIG_ENCODERS |
288 if (avctx->dct_algo == FF_DCT_AUTO || | 279 if (avctx->dct_algo == FF_DCT_AUTO || |
289 avctx->dct_algo == FF_DCT_ALTIVEC) | 280 avctx->dct_algo == FF_DCT_ALTIVEC) { |
290 { | |
291 c->fdct = fdct_altivec; | 281 c->fdct = fdct_altivec; |
292 } | 282 } |
293 #endif //CONFIG_ENCODERS | 283 #endif //CONFIG_ENCODERS |
294 | 284 |
295 if (avctx->lowres==0) | 285 if (avctx->lowres==0) { |
296 { | 286 if ((avctx->idct_algo == FF_IDCT_AUTO) || |
297 if ((avctx->idct_algo == FF_IDCT_AUTO) || | 287 (avctx->idct_algo == FF_IDCT_ALTIVEC)) { |
298 (avctx->idct_algo == FF_IDCT_ALTIVEC)) | 288 c->idct_put = idct_put_altivec; |
299 { | 289 c->idct_add = idct_add_altivec; |
300 c->idct_put = idct_put_altivec; | 290 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; |
301 c->idct_add = idct_add_altivec; | 291 } |
302 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |
303 } | |
304 } | 292 } |
305 | 293 |
306 #ifdef CONFIG_POWERPC_PERF | 294 #ifdef CONFIG_POWERPC_PERF |
307 { | 295 { |
308 int i, j; | 296 int i, j; |
309 for (i = 0 ; i < powerpc_perf_total ; i++) | 297 for (i = 0 ; i < powerpc_perf_total ; i++) { |
310 { | 298 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) { |
311 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | 299 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; |
312 { | 300 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; |
313 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; | 301 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; |
314 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | 302 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; |
315 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | 303 } |
316 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | |
317 } | 304 } |
318 } | |
319 } | 305 } |
320 #endif /* CONFIG_POWERPC_PERF */ | 306 #endif /* CONFIG_POWERPC_PERF */ |
321 } | 307 } |
322 #endif /* HAVE_ALTIVEC */ | 308 #endif /* HAVE_ALTIVEC */ |
323 } | 309 } |