comparison ppc/dsputil_ppc.h @ 1352:e8ff4783f188 libavcodec

1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Wed, 09 Jul 2003 20:18:13 +0000
parents 09b8fe0f0139
children 6a4cfc5f9f96
comparison
equal deleted inserted replaced
1351:0fc1a6f8ca94 1352:e8ff4783f188
28 #else /* CONFIG_DARWIN */ 28 #else /* CONFIG_DARWIN */
29 /* I don't think any non-Apple assembler knows about DCBZL */ 29 /* I don't think any non-Apple assembler knows about DCBZL */
30 #define NO_DCBZL 30 #define NO_DCBZL
31 #endif /* CONFIG_DARWIN */ 31 #endif /* CONFIG_DARWIN */
32 32
33 #ifdef POWERPC_TBL_PERFORMANCE_REPORT 33 #ifdef POWERPC_PERFORMANCE_REPORT
34 void powerpc_display_perf_report(void); 34 void powerpc_display_perf_report(void);
35 /* the 604* have 2, the G3* have 4, the G4s have 6 */
36 #define POWERPC_NUM_PMC_ENABLED 4
35 /* if you add to the enum below, also add to the perfname array 37 /* if you add to the enum below, also add to the perfname array
36 in dsputil_ppc.c */ 38 in dsputil_ppc.c */
37 enum powerpc_perf_index { 39 enum powerpc_perf_index {
38 altivec_fft_num = 0, 40 altivec_fft_num = 0,
39 altivec_gmc1_num, 41 altivec_gmc1_num,
56 powerpc_data_max, 58 powerpc_data_max,
57 powerpc_data_sum, 59 powerpc_data_sum,
58 powerpc_data_num, 60 powerpc_data_num,
59 powerpc_data_total 61 powerpc_data_total
60 }; 62 };
61 extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total]; 63 extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
62 #ifdef POWERPC_PERF_USE_PMC 64
63 extern unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total]; 65 #define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
64 extern unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total]; 66 #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
67 #if (POWERPC_NUM_PMC_ENABLED > 2)
68 #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
69 #define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
70 #else
71 #define POWERPC_GET_PMC3(a) do {} while (0)
72 #define POWERPC_GET_PMC4(a) do {} while (0)
65 #endif 73 #endif
66 74 #if (POWERPC_NUM_PMC_ENABLED > 4)
67 #ifndef POWERPC_PERF_USE_PMC 75 #define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
68 #define POWERPC_GET_CYCLES(a) asm volatile("mftb %0" : "=r" (a)) 76 #define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
69 #define POWERPC_TBL_DECLARE(a, cond) register unsigned long tbl_start, tbl_stop 77 #else
70 #define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_CYCLES(tbl_start); } while (0) 78 #define POWERPC_GET_PMC5(a) do {} while (0)
71 #define POWERPC_TBL_STOP_COUNT(a, cond) do { \ 79 #define POWERPC_GET_PMC6(a) do {} while (0)
72 POWERPC_GET_CYCLES(tbl_stop); \ 80 #endif
73 if (tbl_stop > tbl_start) \ 81 #define POWERPC_PERF_DECLARE(a, cond) unsigned long pmc_start[POWERPC_NUM_PMC_ENABLED], pmc_stop[POWERPC_NUM_PMC_ENABLED], pmc_loop_index;
74 { \ 82 #define POWERPC_PERF_START_COUNT(a, cond) do { \
75 unsigned long diff = tbl_stop - tbl_start; \ 83 POWERPC_GET_PMC6(pmc_start[5]); \
76 if (cond) \ 84 POWERPC_GET_PMC5(pmc_start[4]); \
77 { \ 85 POWERPC_GET_PMC4(pmc_start[3]); \
78 if (diff < perfdata[a][powerpc_data_min]) \ 86 POWERPC_GET_PMC3(pmc_start[2]); \
79 perfdata[a][powerpc_data_min] = diff; \ 87 POWERPC_GET_PMC2(pmc_start[1]); \
80 if (diff > perfdata[a][powerpc_data_max]) \ 88 POWERPC_GET_PMC1(pmc_start[0]); \
81 perfdata[a][powerpc_data_max] = diff; \ 89 } while (0)
82 perfdata[a][powerpc_data_sum] += diff; \ 90 #define POWERPC_PERF_STOP_COUNT(a, cond) do { \
83 perfdata[a][powerpc_data_num] ++; \ 91 POWERPC_GET_PMC1(pmc_stop[0]); \
84 } \ 92 POWERPC_GET_PMC2(pmc_stop[1]); \
85 } \ 93 POWERPC_GET_PMC3(pmc_stop[2]); \
94 POWERPC_GET_PMC4(pmc_stop[3]); \
95 POWERPC_GET_PMC5(pmc_stop[4]); \
96 POWERPC_GET_PMC6(pmc_stop[5]); \
97 if (cond) \
98 { \
99 for(pmc_loop_index = 0; \
100 pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
101 pmc_loop_index++) \
102 { \
103 if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
104 { \
105 unsigned long diff = \
106 pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
107 if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
108 perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
109 if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
110 perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \
111 perfdata[pmc_loop_index][a][powerpc_data_sum] += diff; \
112 perfdata[pmc_loop_index][a][powerpc_data_num] ++; \
113 } \
114 } \
115 } \
86 } while (0) 116 } while (0)
87 117 #else /* POWERPC_PERFORMANCE_REPORT */
88 #else /* POWERPC_PERF_USE_PMC */
89 #define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a))
90 #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
91 #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
92 #define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, pmc2_start, pmc2_stop, pmc3_start, pmc3_stop
93 #define POWERPC_TBL_START_COUNT(a, cond) do { \
94 POWERPC_GET_PMC3(pmc3_start); \
95 POWERPC_GET_PMC2(pmc2_start); \
96 POWERPC_GET_CYCLES(cycles_start); } while (0)
97 #define POWERPC_TBL_STOP_COUNT(a, cond) do { \
98 POWERPC_GET_CYCLES(cycles_stop); \
99 POWERPC_GET_PMC2(pmc2_stop); \
100 POWERPC_GET_PMC3(pmc3_stop); \
101 if (cycles_stop >= cycles_start) \
102 { \
103 unsigned long diff = \
104 cycles_stop - cycles_start; \
105 if (cond) \
106 { \
107 if (diff < perfdata[a][powerpc_data_min]) \
108 perfdata[a][powerpc_data_min] = diff; \
109 if (diff > perfdata[a][powerpc_data_max]) \
110 perfdata[a][powerpc_data_max] = diff; \
111 perfdata[a][powerpc_data_sum] += diff; \
112 perfdata[a][powerpc_data_num] ++; \
113 } \
114 } \
115 if (pmc2_stop >= pmc2_start) \
116 { \
117 unsigned long diff = \
118 pmc2_stop - pmc2_start; \
119 if (cond) \
120 { \
121 if (diff < perfdata_pmc2[a][powerpc_data_min]) \
122 perfdata_pmc2[a][powerpc_data_min] = diff; \
123 if (diff > perfdata_pmc2[a][powerpc_data_max]) \
124 perfdata_pmc2[a][powerpc_data_max] = diff; \
125 perfdata_pmc2[a][powerpc_data_sum] += diff; \
126 perfdata_pmc2[a][powerpc_data_num] ++; \
127 } \
128 } \
129 if (pmc3_stop >= pmc3_start) \
130 { \
131 unsigned long diff = \
132 pmc3_stop - pmc3_start; \
133 if (cond) \
134 { \
135 if (diff < perfdata_pmc3[a][powerpc_data_min]) \
136 perfdata_pmc3[a][powerpc_data_min] = diff; \
137 if (diff > perfdata_pmc3[a][powerpc_data_max]) \
138 perfdata_pmc3[a][powerpc_data_max] = diff; \
139 perfdata_pmc3[a][powerpc_data_sum] += diff; \
140 perfdata_pmc3[a][powerpc_data_num] ++; \
141 } \
142 } \
143 } while (0)
144
145 #endif /* POWERPC_PERF_USE_PMC */
146
147
148 #else /* POWERPC_TBL_PERFORMANCE_REPORT */
149 // those are needed to avoid empty statements. 118 // those are needed to avoid empty statements.
150 #define POWERPC_TBL_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused)) 119 #define POWERPC_PERF_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused))
151 #define POWERPC_TBL_START_COUNT(a, cond) do {} while (0) 120 #define POWERPC_PERF_START_COUNT(a, cond) do {} while (0)
152 #define POWERPC_TBL_STOP_COUNT(a, cond) do {} while (0) 121 #define POWERPC_PERF_STOP_COUNT(a, cond) do {} while (0)
153 #endif /* POWERPC_TBL_PERFORMANCE_REPORT */ 122 #endif /* POWERPC_PERFORMANCE_REPORT */
154 123
155 #endif /* _DSPUTIL_PPC_ */ 124 #endif /* _DSPUTIL_PPC_ */