Mercurial > libavcodec.hg
annotate ppc/dsputil_ppc.h @ 3980:5afe4253a220 libavcodec
replace a few and/sub/... by cmov
this is faster on P3, should be faster on AMD, and should be slower on P4
its disabled by default (benchmarks welcome so we know when to enable it)
author | michael |
---|---|
date | Tue, 10 Oct 2006 01:08:39 +0000 |
parents | 6020c2f3b16b |
children | 891590781d9e |
rev | line source |
---|---|
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
1 /* |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1578
diff
changeset
|
2 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
3 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
15 * |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
19 */ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
20 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
21 #ifndef _DSPUTIL_PPC_ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
22 #define _DSPUTIL_PPC_ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
23 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
24 #ifdef POWERPC_PERFORMANCE_REPORT |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
25 void powerpc_display_perf_report(void); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
26 /* the 604* have 2, the G3* have 4, the G4s have 6, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
27 and the G5 are completely different (they MUST use |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
28 POWERPC_MODE_64BITS, and let's hope all future 64 bis PPC |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
29 will use the same PMCs... */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
30 #define POWERPC_NUM_PMC_ENABLED 6 |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
31 /* if you add to the enum below, also add to the perfname array |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
32 in dsputil_ppc.c */ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
33 enum powerpc_perf_index { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
34 altivec_fft_num = 0, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
35 altivec_gmc1_num, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
36 altivec_dct_unquantize_h263_num, |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1352
diff
changeset
|
37 altivec_fdct, |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
38 altivec_idct_add_num, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
39 altivec_idct_put_num, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
40 altivec_put_pixels16_num, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
41 altivec_avg_pixels16_num, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
42 altivec_avg_pixels8_num, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
43 altivec_put_pixels8_xy2_num, |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
44 altivec_put_no_rnd_pixels8_xy2_num, |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
45 altivec_put_pixels16_xy2_num, |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
46 altivec_put_no_rnd_pixels16_xy2_num, |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1578
diff
changeset
|
47 altivec_hadamard8_diff8x8_num, |
1951
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
48 altivec_hadamard8_diff16_num, |
2057
4c663228e020
avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1951
diff
changeset
|
49 altivec_avg_pixels8_xy2_num, |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
50 powerpc_clear_blocks_dcbz32, |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1045
diff
changeset
|
51 powerpc_clear_blocks_dcbz128, |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
52 altivec_put_h264_chroma_mc8_num, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
53 altivec_avg_h264_chroma_mc8_num, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
54 altivec_put_h264_qpel16_h_lowpass_num, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
55 altivec_avg_h264_qpel16_h_lowpass_num, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
56 altivec_put_h264_qpel16_v_lowpass_num, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
57 altivec_avg_h264_qpel16_v_lowpass_num, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
58 altivec_put_h264_qpel16_hv_lowpass_num, |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2057
diff
changeset
|
59 altivec_avg_h264_qpel16_hv_lowpass_num, |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
60 powerpc_perf_total |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
61 }; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
62 enum powerpc_data_index { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
63 powerpc_data_min = 0, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
64 powerpc_data_max, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
65 powerpc_data_sum, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
66 powerpc_data_num, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
67 powerpc_data_total |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
68 }; |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
69 extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
70 |
1951
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
71 #ifndef POWERPC_MODE_64BITS |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
72 #define POWERP_PMC_DATATYPE unsigned long |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
73 #define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a)) |
1340
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
74 #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a)) |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
75 #if (POWERPC_NUM_PMC_ENABLED > 2) |
1340
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
76 #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a)) |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
77 #define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a)) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
78 #else |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
79 #define POWERPC_GET_PMC3(a) do {} while (0) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
80 #define POWERPC_GET_PMC4(a) do {} while (0) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
81 #endif |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
82 #if (POWERPC_NUM_PMC_ENABLED > 4) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
83 #define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a)) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
84 #define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a)) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
85 #else |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
86 #define POWERPC_GET_PMC5(a) do {} while (0) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
87 #define POWERPC_GET_PMC6(a) do {} while (0) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
88 #endif |
1951
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
89 #else /* POWERPC_MODE_64BITS */ |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
90 #define POWERP_PMC_DATATYPE unsigned long long |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
91 #define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a)) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
92 #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a)) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
93 #if (POWERPC_NUM_PMC_ENABLED > 2) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
94 #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a)) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
95 #define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a)) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
96 #else |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
97 #define POWERPC_GET_PMC3(a) do {} while (0) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
98 #define POWERPC_GET_PMC4(a) do {} while (0) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
99 #endif |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
100 #if (POWERPC_NUM_PMC_ENABLED > 4) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
101 #define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a)) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
102 #define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a)) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
103 #else |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
104 #define POWERPC_GET_PMC5(a) do {} while (0) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
105 #define POWERPC_GET_PMC6(a) do {} while (0) |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
106 #endif |
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
107 #endif /* POWERPC_MODE_64BITS */ |
2979 | 108 #define POWERPC_PERF_DECLARE(a, cond) \ |
109 POWERP_PMC_DATATYPE \ | |
110 pmc_start[POWERPC_NUM_PMC_ENABLED], \ | |
111 pmc_stop[POWERPC_NUM_PMC_ENABLED], \ | |
1951
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
112 pmc_loop_index; |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
113 #define POWERPC_PERF_START_COUNT(a, cond) do { \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
114 POWERPC_GET_PMC6(pmc_start[5]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
115 POWERPC_GET_PMC5(pmc_start[4]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
116 POWERPC_GET_PMC4(pmc_start[3]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
117 POWERPC_GET_PMC3(pmc_start[2]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
118 POWERPC_GET_PMC2(pmc_start[1]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
119 POWERPC_GET_PMC1(pmc_start[0]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
120 } while (0) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
121 #define POWERPC_PERF_STOP_COUNT(a, cond) do { \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
122 POWERPC_GET_PMC1(pmc_stop[0]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
123 POWERPC_GET_PMC2(pmc_stop[1]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
124 POWERPC_GET_PMC3(pmc_stop[2]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
125 POWERPC_GET_PMC4(pmc_stop[3]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
126 POWERPC_GET_PMC5(pmc_stop[4]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
127 POWERPC_GET_PMC6(pmc_stop[5]); \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
128 if (cond) \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
129 { \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
130 for(pmc_loop_index = 0; \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
131 pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
132 pmc_loop_index++) \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
133 { \ |
1951
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
134 if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \ |
2979 | 135 { \ |
136 POWERP_PMC_DATATYPE diff = \ | |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
137 pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
138 if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
139 perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
140 if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
141 perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
142 perfdata[pmc_loop_index][a][powerpc_data_sum] += diff; \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
143 perfdata[pmc_loop_index][a][powerpc_data_num] ++; \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
144 } \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
145 } \ |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
146 } \ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
147 } while (0) |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
148 #else /* POWERPC_PERFORMANCE_REPORT */ |
1045
c8b438ecf8f4
fix for ppc compile broken by (Romain Dolbeau <dolbeau at irisa dot fr> and Colin Leroy <colin at colino dot net>)
michaelni
parents:
1024
diff
changeset
|
149 // those are needed to avoid empty statements. |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
150 #define POWERPC_PERF_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused)) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
151 #define POWERPC_PERF_START_COUNT(a, cond) do {} while (0) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
152 #define POWERPC_PERF_STOP_COUNT(a, cond) do {} while (0) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
153 #endif /* POWERPC_PERFORMANCE_REPORT */ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
154 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
diff
changeset
|
155 #endif /* _DSPUTIL_PPC_ */ |