Mercurial > libavcodec.hg
annotate ppc/dsputil_ppc.c @ 1949:66215baae7b9 libavcodec
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michael |
---|---|
date | Tue, 20 Apr 2004 17:05:12 +0000 |
parents | dd63cb7e5080 |
children | 2599b8444831 |
rev | line source |
---|---|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
1 /* |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
2 * Copyright (c) 2002 Brian Foley |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
3 * Copyright (c) 2002 Dieter Shirley |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
5 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
6 * This library is free software; you can redistribute it and/or |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
9 * version 2 of the License, or (at your option) any later version. |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
10 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
11 * This library is distributed in the hope that it will be useful, |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
14 * Lesser General Public License for more details. |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
15 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
17 * License along with this library; if not, write to the Free Software |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
19 */ |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
20 |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
21 #include "../dsputil.h" |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
22 |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
23 #include "dsputil_ppc.h" |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
24 |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
25 #ifdef HAVE_ALTIVEC |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
26 #include "dsputil_altivec.h" |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
27 #endif |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
28 |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
29 extern void fdct_altivec(int16_t *block); |
1092 | 30 extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); |
31 extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |
32 | |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
33 int mm_flags = 0; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
34 |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
35 int mm_support(void) |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
36 { |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
37 int result = 0; |
1511
587258262aa5
recommit (of patch, as cvslog msg didnt apply cleanly)
michael
parents:
1352
diff
changeset
|
38 #ifdef HAVE_ALTIVEC |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
39 if (has_altivec()) { |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
40 result |= MM_ALTIVEC; |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
41 } |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
42 #endif /* result */ |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
43 return result; |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
44 } |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
45 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
46 #ifdef POWERPC_PERFORMANCE_REPORT |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
47 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
48 /* list below must match enum in dsputil_ppc.h */ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
49 static unsigned char* perfname[] = { |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1839
diff
changeset
|
50 "ff_fft_calc_altivec", |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
51 "gmc1_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
52 "dct_unquantize_h263_altivec", |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
53 "fdct_altivec", |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
54 "idct_add_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
55 "idct_put_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
56 "put_pixels16_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
57 "avg_pixels16_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
58 "avg_pixels8_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
59 "put_pixels8_xy2_altivec", |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
60 "put_no_rnd_pixels8_xy2_altivec", |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
61 "put_pixels16_xy2_altivec", |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
62 "put_no_rnd_pixels16_xy2_altivec", |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
63 "hadamard8_diff8x8_altivec", |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
64 "clear_blocks_dcbz32_ppc", |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
65 "clear_blocks_dcbz128_ppc" |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
66 }; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
67 #include <stdio.h> |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
68 #endif |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
69 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
70 #ifdef POWERPC_PERFORMANCE_REPORT |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
71 void powerpc_display_perf_report(void) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
72 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
73 int i, j; |
1839
b370288f004d
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
michael
parents:
1708
diff
changeset
|
74 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
75 for(i = 0 ; i < powerpc_perf_total ; i++) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
76 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
77 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
78 { |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
79 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) |
1839
b370288f004d
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
michael
parents:
1708
diff
changeset
|
80 av_log(NULL, AV_LOG_INFO, |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
81 " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
82 perfname[i], |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
83 j+1, |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
84 perfdata[j][i][powerpc_data_min], |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
85 perfdata[j][i][powerpc_data_max], |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
86 (double)perfdata[j][i][powerpc_data_sum] / |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
87 (double)perfdata[j][i][powerpc_data_num], |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
88 perfdata[j][i][powerpc_data_num]); |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
89 } |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
90 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
91 } |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
92 #endif /* POWERPC_PERFORMANCE_REPORT */ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
93 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
94 /* ***** WARNING ***** WARNING ***** WARNING ***** */ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
95 /* |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
96 clear_blocks_dcbz32_ppc will not work properly |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
97 on PowerPC processors with a cache line size |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
98 not equal to 32 bytes. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
99 Fortunately all processor used by Apple up to |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
100 at least the 7450 (aka second generation G4) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
101 use 32 bytes cache line. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
102 This is due to the use of the 'dcbz' instruction. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
103 It simply clear to zero a single cache line, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
104 so you need to know the cache line size to use it ! |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
105 It's absurd, but it's fast... |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
106 |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
107 update 24/06/2003 : Apple released yesterday the G5, |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
108 with a PPC970. cache line size : 128 bytes. Oups. |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
109 The semantic of dcbz was changed, it always clear |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
110 32 bytes. so the function below will work, but will |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
111 be slow. So I fixed check_dcbz_effect to use dcbzl, |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
112 which is defined to clear a cache line (as dcbz before). |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
113 So we still can distinguish, and use dcbz (32 bytes) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
114 or dcbzl (one cache line) as required. |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
115 |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
116 see <http://developer.apple.com/technotes/tn/tn2087.html> |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
117 and <http://developer.apple.com/technotes/tn/tn2086.html> |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
118 */ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
119 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
120 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
121 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
122 register int misal = ((unsigned long)blocks & 0x00000010); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
123 register int i = 0; |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
124 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
125 #if 1 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
126 if (misal) { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
127 ((unsigned long*)blocks)[0] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
128 ((unsigned long*)blocks)[1] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
129 ((unsigned long*)blocks)[2] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
130 ((unsigned long*)blocks)[3] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
131 i += 16; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
132 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
133 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) { |
1839
b370288f004d
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
michael
parents:
1708
diff
changeset
|
134 #ifndef __MWERKS__ |
1340
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
135 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
1839
b370288f004d
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
michael
parents:
1708
diff
changeset
|
136 #else |
b370288f004d
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
michael
parents:
1708
diff
changeset
|
137 __dcbz( blocks, i ); |
b370288f004d
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
michael
parents:
1708
diff
changeset
|
138 #endif |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
139 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
140 if (misal) { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
141 ((unsigned long*)blocks)[188] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
142 ((unsigned long*)blocks)[189] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
143 ((unsigned long*)blocks)[190] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
144 ((unsigned long*)blocks)[191] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
145 i += 16; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
146 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
147 #else |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
148 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
149 #endif |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
150 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
151 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
152 |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
153 /* same as above, when dcbzl clear a whole 128B cache line |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
154 i.e. the PPC970 aka G5 */ |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
155 #ifndef NO_DCBZL |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
156 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
157 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
158 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
159 register int misal = ((unsigned long)blocks & 0x0000007f); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
160 register int i = 0; |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
161 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
162 #if 1 |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
163 if (misal) { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
164 // we could probably also optimize this case, |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
165 // but there's not much point as the machines |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
166 // aren't available yet (2003-06-26) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
167 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
168 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
169 else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
170 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { |
1340
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
171 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
172 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
173 #else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
174 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
175 #endif |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
176 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
177 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
178 #else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
179 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
180 { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
181 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
182 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
183 #endif |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
184 |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
185 #ifndef NO_DCBZL |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
186 /* check dcbz report how many bytes are set to 0 by dcbz */ |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
187 /* update 24/06/2003 : replace dcbz by dcbzl to get |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
188 the intended effect (Apple "fixed" dcbz) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
189 unfortunately this cannot be used unless the assembler |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
190 knows about dcbzl ... */ |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
191 long check_dcbzl_effect(void) |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
192 { |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
193 register char *fakedata = (char*)av_malloc(1024); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
194 register char *fakedata_middle; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
195 register long zero = 0; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
196 register long i = 0; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
197 long count = 0; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
198 |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
199 if (!fakedata) |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
200 { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
201 return 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
202 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
203 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
204 fakedata_middle = (fakedata + 512); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
205 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
206 memset(fakedata, 0xFF, 1024); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
207 |
1340
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
208 /* below the constraint "b" seems to mean "Address base register" |
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
209 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ |
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
210 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
211 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
212 for (i = 0; i < 1024 ; i ++) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
213 { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
214 if (fakedata[i] == (char)0) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
215 count++; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
216 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
217 |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
218 av_free(fakedata); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
219 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
220 return count; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
221 } |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
222 #else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
223 long check_dcbzl_effect(void) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
224 { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
225 return 0; |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
226 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
227 #endif |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
228 |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
229 #ifdef HAVE_ALTIVEC |
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
230 // can't put that in dsputil_altivec.c, |
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
231 // has WARPER8_16_SQ declare the function "static" ... |
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
232 WARPER8_16_SQ(hadamard8_diff8x8_altivec, hadamard8_diff16_altivec) |
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
233 #endif |
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
234 |
1092 | 235 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
236 { |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
237 // Common optimizations whether Altivec is available or not |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
238 |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
239 switch (check_dcbzl_effect()) { |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
240 case 32: |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
241 c->clear_blocks = clear_blocks_dcbz32_ppc; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
242 break; |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
243 case 128: |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
244 c->clear_blocks = clear_blocks_dcbz128_ppc; |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
245 break; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
246 default: |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
247 break; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
248 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
249 |
1511
587258262aa5
recommit (of patch, as cvslog msg didnt apply cleanly)
michael
parents:
1352
diff
changeset
|
250 #ifdef HAVE_ALTIVEC |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
251 if (has_altivec()) { |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
252 mm_flags |= MM_ALTIVEC; |
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
253 |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
254 // Altivec specific optimisations |
1708 | 255 c->pix_abs[0][1] = sad16_x2_altivec; |
256 c->pix_abs[0][2] = sad16_y2_altivec; | |
257 c->pix_abs[0][3] = sad16_xy2_altivec; | |
258 c->pix_abs[0][0] = sad16_altivec; | |
259 c->pix_abs[1][0] = sad8_altivec; | |
260 c->sad[0]= sad16_altivec; | |
261 c->sad[1]= sad8_altivec; | |
878
6ea69518e5f7
altivec optimizations patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
856
diff
changeset
|
262 c->pix_norm1 = pix_norm1_altivec; |
981 | 263 c->sse[1]= sse8_altivec; |
264 c->sse[0]= sse16_altivec; | |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
828
diff
changeset
|
265 c->pix_sum = pix_sum_altivec; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
828
diff
changeset
|
266 c->diff_pixels = diff_pixels_altivec; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
828
diff
changeset
|
267 c->get_pixels = get_pixels_altivec; |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
268 // next one disabled as it's untested. |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
269 #if 0 |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
270 c->add_bytes= add_bytes_altivec; |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
271 #endif /* 0 */ |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
272 c->put_pixels_tab[0][0] = put_pixels16_altivec; |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
273 /* the two functions do the same thing, so use the same code */ |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
274 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
275 c->avg_pixels_tab[0][0] = avg_pixels16_altivec; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
276 // next one disabled as it's untested. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
277 #if 0 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
278 c->avg_pixels_tab[1][0] = avg_pixels8_altivec; |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
279 #endif /* 0 */ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
280 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
281 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
282 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
283 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
284 |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
285 c->gmc1 = gmc1_altivec; |
1092 | 286 |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
287 c->hadamard8_diff[0] = hadamard8_diff16_altivec; |
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
288 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; |
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
289 |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
290 #ifdef CONFIG_ENCODERS |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
291 if (avctx->dct_algo == FF_DCT_AUTO || |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
292 avctx->dct_algo == FF_DCT_ALTIVEC) |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
293 { |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
294 c->fdct = fdct_altivec; |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
295 } |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
296 #endif //CONFIG_ENCODERS |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
297 |
1092 | 298 if ((avctx->idct_algo == FF_IDCT_AUTO) || |
299 (avctx->idct_algo == FF_IDCT_ALTIVEC)) | |
300 { | |
301 c->idct_put = idct_put_altivec; | |
302 c->idct_add = idct_add_altivec; | |
303 #ifndef ALTIVEC_USE_REFERENCE_C_CODE | |
304 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |
305 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
306 c->idct_permutation_type = FF_NO_IDCT_PERM; | |
307 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
308 } | |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
309 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
310 #ifdef POWERPC_PERFORMANCE_REPORT |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
311 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
312 int i, j; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
313 for (i = 0 ; i < powerpc_perf_total ; i++) |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
314 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
315 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
316 { |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
317 perfdata[j][i][powerpc_data_min] = (unsigned long long)0xFFFFFFFFFFFFFFFF; |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
318 perfdata[j][i][powerpc_data_max] = (unsigned long long)0x0000000000000000; |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
319 perfdata[j][i][powerpc_data_sum] = (unsigned long long)0x0000000000000000; |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
320 perfdata[j][i][powerpc_data_num] = (unsigned long long)0x0000000000000000; |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
321 } |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
322 } |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
323 } |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
324 #endif /* POWERPC_PERFORMANCE_REPORT */ |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
325 } else |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
326 #endif /* HAVE_ALTIVEC */ |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
327 { |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
328 // Non-AltiVec PPC optimisations |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
329 |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
330 // ... pending ... |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
331 } |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
332 } |