Mercurial > libavcodec.hg
comparison ppc/dsputil_ppc.c @ 1015:35cf2f4a0f8c libavcodec
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michaelni |
---|---|
date | Sun, 19 Jan 2003 19:00:45 +0000 |
parents | 3b7cc8e4b83f |
children | 9cc1031e1864 |
comparison
equal
deleted
inserted
replaced
1014:48349e11c9b2 | 1015:35cf2f4a0f8c |
---|---|
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 */ | 18 */ |
19 | 19 |
20 #include "../dsputil.h" | 20 #include "../dsputil.h" |
21 | 21 |
22 #include "dsputil_ppc.h" | |
23 | |
22 #ifdef HAVE_ALTIVEC | 24 #ifdef HAVE_ALTIVEC |
23 #include "dsputil_altivec.h" | 25 #include "dsputil_altivec.h" |
24 #endif | 26 #endif |
25 | 27 |
26 int mm_flags = 0; | 28 int mm_flags = 0; |
34 } | 36 } |
35 #endif /* result */ | 37 #endif /* result */ |
36 return result; | 38 return result; |
37 } | 39 } |
38 | 40 |
41 #ifdef POWERPC_TBL_PERFORMANCE_REPORT | |
42 unsigned long long perfdata[powerpc_perf_total][powerpc_data_total]; | |
43 /* list below must match enum in dsputil_altivec.h */ | |
44 static unsigned char* perfname[] = { | |
45 "fft_calc_altivec", | |
46 "gmc1_altivec", | |
47 "dct_unquantize_h263_altivec", | |
48 "idct_add_altivec", | |
49 "idct_put_altivec", | |
50 "put_pixels_clamped_altivec", | |
51 "put_pixels16_altivec", | |
52 "avg_pixels16_altivec", | |
53 "avg_pixels8_altivec", | |
54 "put_pixels8_xy2_altivec", | |
55 "clear_blocks_dcbz32_ppc" | |
56 }; | |
57 #ifdef POWERPC_PERF_USE_PMC | |
58 unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total]; | |
59 #endif | |
60 #include <stdio.h> | |
61 #endif | |
62 | |
63 #ifdef POWERPC_TBL_PERFORMANCE_REPORT | |
64 void powerpc_display_perf_report(void) | |
65 { | |
66 int i; | |
67 #ifndef POWERPC_PERF_USE_PMC | |
68 fprintf(stderr, "AltiVec performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n"); | |
69 #else /* POWERPC_PERF_USE_PMC */ | |
70 fprintf(stderr, "AltiVec performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); | |
71 #endif /* POWERPC_PERF_USE_PMC */ | |
72 for(i = 0 ; i < powerpc_perf_total ; i++) | |
73 { | |
74 if (perfdata[i][powerpc_data_num] != (unsigned long long)0) | |
75 fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |
76 perfname[i], | |
77 perfdata[i][powerpc_data_min], | |
78 perfdata[i][powerpc_data_max], | |
79 (double)perfdata[i][powerpc_data_sum] / | |
80 (double)perfdata[i][powerpc_data_num], | |
81 perfdata[i][powerpc_data_num]); | |
82 #ifdef POWERPC_PERF_USE_PMC | |
83 if (perfdata_miss[i][powerpc_data_num] != (unsigned long long)0) | |
84 fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |
85 perfname[i], | |
86 perfdata_miss[i][powerpc_data_min], | |
87 perfdata_miss[i][powerpc_data_max], | |
88 (double)perfdata_miss[i][powerpc_data_sum] / | |
89 (double)perfdata_miss[i][powerpc_data_num], | |
90 perfdata_miss[i][powerpc_data_num]); | |
91 #endif | |
92 } | |
93 } | |
94 #endif /* POWERPC_TBL_PERFORMANCE_REPORT */ | |
95 | |
96 /* ***** WARNING ***** WARNING ***** WARNING ***** */ | |
97 /* | |
98 clear_blocks_dcbz32_ppc will not work properly | |
99 on PowerPC processors with a cache line size | |
100 not equal to 32 bytes. | |
101 Fortunately all processor used by Apple up to | |
102 at least the 7450 (aka second generation G4) | |
103 use 32 bytes cache line. | |
104 This is due to the use of the 'dcbz' instruction. | |
105 It simply clear to zero a single cache line, | |
106 so you need to know the cache line size to use it ! | |
107 It's absurd, but it's fast... | |
108 */ | |
109 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) | |
110 { | |
111 POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1); | |
112 register int misal = ((unsigned long)blocks & 0x00000010); | |
113 register int i = 0; | |
114 POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | |
115 #if 1 | |
116 if (misal) { | |
117 ((unsigned long*)blocks)[0] = 0L; | |
118 ((unsigned long*)blocks)[1] = 0L; | |
119 ((unsigned long*)blocks)[2] = 0L; | |
120 ((unsigned long*)blocks)[3] = 0L; | |
121 vec_st((vector short)(0), 0, blocks); | |
122 i += 16; | |
123 } | |
124 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) { | |
125 asm volatile("dcbz %0,%1" : : "r" (blocks), "r" (i) : "memory"); | |
126 } | |
127 if (misal) { | |
128 ((unsigned long*)blocks)[188] = 0L; | |
129 ((unsigned long*)blocks)[189] = 0L; | |
130 ((unsigned long*)blocks)[190] = 0L; | |
131 ((unsigned long*)blocks)[191] = 0L; | |
132 i += 16; | |
133 } | |
134 #else | |
135 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
136 #endif | |
137 POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | |
138 } | |
139 | |
140 /* check dcbz report how many bytes are set to 0 by dcbz */ | |
141 long check_dcbz_effect(void) | |
142 { | |
143 register char *fakedata = (char*)malloc(1024); | |
144 register char *fakedata_middle; | |
145 register long zero = 0; | |
146 register long i = 0; | |
147 long count = 0; | |
148 | |
149 if (fakedata == NULL) | |
150 { | |
151 return 0L; | |
152 } | |
153 | |
154 | |
155 fakedata_middle = (fakedata + 512); | |
156 | |
157 memset(fakedata, 0xFF, 1024); | |
158 | |
159 asm volatile("dcbz %0, %1" : : "r" (fakedata_middle), "r" (zero)); | |
160 | |
161 for (i = 0; i < 1024 ; i ++) | |
162 { | |
163 if (fakedata[i] == (char)0) | |
164 count++; | |
165 } | |
166 | |
167 free(fakedata); | |
168 | |
169 return count; | |
170 } | |
171 | |
39 void dsputil_init_ppc(DSPContext* c, unsigned mask) | 172 void dsputil_init_ppc(DSPContext* c, unsigned mask) |
40 { | 173 { |
41 // Common optimisations whether Altivec or not | 174 // Common optimisations whether Altivec or not |
42 | 175 |
43 // ... pending ... | 176 switch (check_dcbz_effect()) { |
44 | 177 case 32: |
178 c->clear_blocks = clear_blocks_dcbz32_ppc; | |
179 break; | |
180 default: | |
181 break; | |
182 } | |
183 | |
45 #if HAVE_ALTIVEC | 184 #if HAVE_ALTIVEC |
46 if (has_altivec()) { | 185 if (has_altivec()) { |
47 mm_flags |= MM_ALTIVEC; | 186 mm_flags |= MM_ALTIVEC; |
48 | 187 |
49 // Altivec specific optimisations | 188 // Altivec specific optimisations |
65 c->add_bytes= add_bytes_altivec; | 204 c->add_bytes= add_bytes_altivec; |
66 c->put_pixels_clamped = put_pixels_clamped_altivec; | 205 c->put_pixels_clamped = put_pixels_clamped_altivec; |
67 #endif | 206 #endif |
68 c->put_pixels_tab[0][0] = put_pixels16_altivec; | 207 c->put_pixels_tab[0][0] = put_pixels16_altivec; |
69 c->avg_pixels_tab[0][0] = avg_pixels16_altivec; | 208 c->avg_pixels_tab[0][0] = avg_pixels16_altivec; |
209 // next one disabled as it's untested. | |
210 #if 0 | |
211 c->avg_pixels_tab[1][0] = avg_pixels8_altivec; | |
212 #endif | |
213 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; | |
214 | |
70 c->gmc1 = gmc1_altivec; | 215 c->gmc1 = gmc1_altivec; |
71 | 216 |
72 #ifdef ALTIVEC_TBL_PERFORMANCE_REPORT | 217 #ifdef POWERPC_TBL_PERFORMANCE_REPORT |
73 { | 218 { |
74 int i; | 219 int i; |
75 for (i = 0 ; i < altivec_perf_total ; i++) | 220 for (i = 0 ; i < powerpc_perf_total ; i++) |
76 { | 221 { |
77 perfdata[i][altivec_data_min] = 0xFFFFFFFFFFFFFFFF; | 222 perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF; |
78 perfdata[i][altivec_data_max] = 0x0000000000000000; | 223 perfdata[i][powerpc_data_max] = 0x0000000000000000; |
79 perfdata[i][altivec_data_sum] = 0x0000000000000000; | 224 perfdata[i][powerpc_data_sum] = 0x0000000000000000; |
80 perfdata[i][altivec_data_num] = 0x0000000000000000; | 225 perfdata[i][powerpc_data_num] = 0x0000000000000000; |
226 #ifdef POWERPC_PERF_USE_PMC | |
227 perfdata_miss[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF; | |
228 perfdata_miss[i][powerpc_data_max] = 0x0000000000000000; | |
229 perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000; | |
230 perfdata_miss[i][powerpc_data_num] = 0x0000000000000000; | |
231 #endif | |
81 } | 232 } |
82 } | 233 } |
83 #endif | 234 #endif |
84 } else | 235 } else |
85 #endif | 236 #endif |