Mercurial > libavcodec.hg
comparison ppc/dsputil_ppc.h @ 1352:e8ff4783f188 libavcodec
1) remove TBL support in PPC performance. It's much more useful to use the
PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless
code around
2) make the PPC perf stuff a configure option
3) make put_pixels16_altivec a bit faster by unrolling the loop by 4
patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michaelni |
---|---|
date | Wed, 09 Jul 2003 20:18:13 +0000 |
parents | 09b8fe0f0139 |
children | 6a4cfc5f9f96 |
comparison
equal
deleted
inserted
replaced
1351:0fc1a6f8ca94 | 1352:e8ff4783f188 |
---|---|
28 #else /* CONFIG_DARWIN */ | 28 #else /* CONFIG_DARWIN */ |
29 /* I don't think any non-Apple assembler knows about DCBZL */ | 29 /* I don't think any non-Apple assembler knows about DCBZL */ |
30 #define NO_DCBZL | 30 #define NO_DCBZL |
31 #endif /* CONFIG_DARWIN */ | 31 #endif /* CONFIG_DARWIN */ |
32 | 32 |
33 #ifdef POWERPC_TBL_PERFORMANCE_REPORT | 33 #ifdef POWERPC_PERFORMANCE_REPORT |
34 void powerpc_display_perf_report(void); | 34 void powerpc_display_perf_report(void); |
35 /* the 604* have 2, the G3* have 4, the G4s have 6 */ | |
36 #define POWERPC_NUM_PMC_ENABLED 4 | |
35 /* if you add to the enum below, also add to the perfname array | 37 /* if you add to the enum below, also add to the perfname array |
36 in dsputil_ppc.c */ | 38 in dsputil_ppc.c */ |
37 enum powerpc_perf_index { | 39 enum powerpc_perf_index { |
38 altivec_fft_num = 0, | 40 altivec_fft_num = 0, |
39 altivec_gmc1_num, | 41 altivec_gmc1_num, |
56 powerpc_data_max, | 58 powerpc_data_max, |
57 powerpc_data_sum, | 59 powerpc_data_sum, |
58 powerpc_data_num, | 60 powerpc_data_num, |
59 powerpc_data_total | 61 powerpc_data_total |
60 }; | 62 }; |
61 extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total]; | 63 extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; |
62 #ifdef POWERPC_PERF_USE_PMC | 64 |
63 extern unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total]; | 65 #define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a)) |
64 extern unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total]; | 66 #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a)) |
67 #if (POWERPC_NUM_PMC_ENABLED > 2) | |
68 #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a)) | |
69 #define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a)) | |
70 #else | |
71 #define POWERPC_GET_PMC3(a) do {} while (0) | |
72 #define POWERPC_GET_PMC4(a) do {} while (0) | |
65 #endif | 73 #endif |
66 | 74 #if (POWERPC_NUM_PMC_ENABLED > 4) |
67 #ifndef POWERPC_PERF_USE_PMC | 75 #define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a)) |
68 #define POWERPC_GET_CYCLES(a) asm volatile("mftb %0" : "=r" (a)) | 76 #define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a)) |
69 #define POWERPC_TBL_DECLARE(a, cond) register unsigned long tbl_start, tbl_stop | 77 #else |
70 #define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_CYCLES(tbl_start); } while (0) | 78 #define POWERPC_GET_PMC5(a) do {} while (0) |
71 #define POWERPC_TBL_STOP_COUNT(a, cond) do { \ | 79 #define POWERPC_GET_PMC6(a) do {} while (0) |
72 POWERPC_GET_CYCLES(tbl_stop); \ | 80 #endif |
73 if (tbl_stop > tbl_start) \ | 81 #define POWERPC_PERF_DECLARE(a, cond) unsigned long pmc_start[POWERPC_NUM_PMC_ENABLED], pmc_stop[POWERPC_NUM_PMC_ENABLED], pmc_loop_index; |
74 { \ | 82 #define POWERPC_PERF_START_COUNT(a, cond) do { \ |
75 unsigned long diff = tbl_stop - tbl_start; \ | 83 POWERPC_GET_PMC6(pmc_start[5]); \ |
76 if (cond) \ | 84 POWERPC_GET_PMC5(pmc_start[4]); \ |
77 { \ | 85 POWERPC_GET_PMC4(pmc_start[3]); \ |
78 if (diff < perfdata[a][powerpc_data_min]) \ | 86 POWERPC_GET_PMC3(pmc_start[2]); \ |
79 perfdata[a][powerpc_data_min] = diff; \ | 87 POWERPC_GET_PMC2(pmc_start[1]); \ |
80 if (diff > perfdata[a][powerpc_data_max]) \ | 88 POWERPC_GET_PMC1(pmc_start[0]); \ |
81 perfdata[a][powerpc_data_max] = diff; \ | 89 } while (0) |
82 perfdata[a][powerpc_data_sum] += diff; \ | 90 #define POWERPC_PERF_STOP_COUNT(a, cond) do { \ |
83 perfdata[a][powerpc_data_num] ++; \ | 91 POWERPC_GET_PMC1(pmc_stop[0]); \ |
84 } \ | 92 POWERPC_GET_PMC2(pmc_stop[1]); \ |
85 } \ | 93 POWERPC_GET_PMC3(pmc_stop[2]); \ |
94 POWERPC_GET_PMC4(pmc_stop[3]); \ | |
95 POWERPC_GET_PMC5(pmc_stop[4]); \ | |
96 POWERPC_GET_PMC6(pmc_stop[5]); \ | |
97 if (cond) \ | |
98 { \ | |
99 for(pmc_loop_index = 0; \ | |
100 pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \ | |
101 pmc_loop_index++) \ | |
102 { \ | |
103 if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \ | |
104 { \ | |
105 unsigned long diff = \ | |
106 pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \ | |
107 if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \ | |
108 perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \ | |
109 if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \ | |
110 perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \ | |
111 perfdata[pmc_loop_index][a][powerpc_data_sum] += diff; \ | |
112 perfdata[pmc_loop_index][a][powerpc_data_num] ++; \ | |
113 } \ | |
114 } \ | |
115 } \ | |
86 } while (0) | 116 } while (0) |
87 | 117 #else /* POWERPC_PERFORMANCE_REPORT */ |
88 #else /* POWERPC_PERF_USE_PMC */ | |
89 #define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a)) | |
90 #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a)) | |
91 #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a)) | |
92 #define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, pmc2_start, pmc2_stop, pmc3_start, pmc3_stop | |
93 #define POWERPC_TBL_START_COUNT(a, cond) do { \ | |
94 POWERPC_GET_PMC3(pmc3_start); \ | |
95 POWERPC_GET_PMC2(pmc2_start); \ | |
96 POWERPC_GET_CYCLES(cycles_start); } while (0) | |
97 #define POWERPC_TBL_STOP_COUNT(a, cond) do { \ | |
98 POWERPC_GET_CYCLES(cycles_stop); \ | |
99 POWERPC_GET_PMC2(pmc2_stop); \ | |
100 POWERPC_GET_PMC3(pmc3_stop); \ | |
101 if (cycles_stop >= cycles_start) \ | |
102 { \ | |
103 unsigned long diff = \ | |
104 cycles_stop - cycles_start; \ | |
105 if (cond) \ | |
106 { \ | |
107 if (diff < perfdata[a][powerpc_data_min]) \ | |
108 perfdata[a][powerpc_data_min] = diff; \ | |
109 if (diff > perfdata[a][powerpc_data_max]) \ | |
110 perfdata[a][powerpc_data_max] = diff; \ | |
111 perfdata[a][powerpc_data_sum] += diff; \ | |
112 perfdata[a][powerpc_data_num] ++; \ | |
113 } \ | |
114 } \ | |
115 if (pmc2_stop >= pmc2_start) \ | |
116 { \ | |
117 unsigned long diff = \ | |
118 pmc2_stop - pmc2_start; \ | |
119 if (cond) \ | |
120 { \ | |
121 if (diff < perfdata_pmc2[a][powerpc_data_min]) \ | |
122 perfdata_pmc2[a][powerpc_data_min] = diff; \ | |
123 if (diff > perfdata_pmc2[a][powerpc_data_max]) \ | |
124 perfdata_pmc2[a][powerpc_data_max] = diff; \ | |
125 perfdata_pmc2[a][powerpc_data_sum] += diff; \ | |
126 perfdata_pmc2[a][powerpc_data_num] ++; \ | |
127 } \ | |
128 } \ | |
129 if (pmc3_stop >= pmc3_start) \ | |
130 { \ | |
131 unsigned long diff = \ | |
132 pmc3_stop - pmc3_start; \ | |
133 if (cond) \ | |
134 { \ | |
135 if (diff < perfdata_pmc3[a][powerpc_data_min]) \ | |
136 perfdata_pmc3[a][powerpc_data_min] = diff; \ | |
137 if (diff > perfdata_pmc3[a][powerpc_data_max]) \ | |
138 perfdata_pmc3[a][powerpc_data_max] = diff; \ | |
139 perfdata_pmc3[a][powerpc_data_sum] += diff; \ | |
140 perfdata_pmc3[a][powerpc_data_num] ++; \ | |
141 } \ | |
142 } \ | |
143 } while (0) | |
144 | |
145 #endif /* POWERPC_PERF_USE_PMC */ | |
146 | |
147 | |
148 #else /* POWERPC_TBL_PERFORMANCE_REPORT */ | |
149 // those are needed to avoid empty statements. | 118 // those are needed to avoid empty statements. |
150 #define POWERPC_TBL_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused)) | 119 #define POWERPC_PERF_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused)) |
151 #define POWERPC_TBL_START_COUNT(a, cond) do {} while (0) | 120 #define POWERPC_PERF_START_COUNT(a, cond) do {} while (0) |
152 #define POWERPC_TBL_STOP_COUNT(a, cond) do {} while (0) | 121 #define POWERPC_PERF_STOP_COUNT(a, cond) do {} while (0) |
153 #endif /* POWERPC_TBL_PERFORMANCE_REPORT */ | 122 #endif /* POWERPC_PERFORMANCE_REPORT */ |
154 | 123 |
155 #endif /* _DSPUTIL_PPC_ */ | 124 #endif /* _DSPUTIL_PPC_ */ |