Mercurial > libavcodec.hg
annotate ppc/dsputil_ppc.c @ 6323:e6da66f378c7 libavcodec
mpegvideo.h has two function declarations with the 'inline' specifier
but no definition for those functions. The C standard requires a
definition to appear in the same translation unit for any function
declared with 'inline'. Most of the files including mpegvideo.h do not
define those functions. Fix this by removing the 'inline' specifiers
from the header.
patch by Uoti Urpala
author | diego |
---|---|
date | Sun, 03 Feb 2008 17:54:30 +0000 |
parents | ed05a3d964fa |
children | f7cbb7733146 |
rev | line source |
---|---|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
1 /* |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
2 * Copyright (c) 2002 Brian Foley |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
3 * Copyright (c) 2002 Dieter Shirley |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
5 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
6 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
7 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
12 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
16 * Lesser General Public License for more details. |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
17 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
21 */ |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
22 |
5010
d5ba514e3f4a
Add libavcodec to compiler include flags in order to simplify header
diego
parents:
4838
diff
changeset
|
23 #include "dsputil.h" |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
24 |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
25 #include "dsputil_ppc.h" |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
26 |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
27 #ifdef HAVE_ALTIVEC |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
28 #include "dsputil_altivec.h" |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
29 |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
30 extern void fdct_altivec(int16_t *block); |
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
31 extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
32 int x16, int y16, int rounder); |
1092 | 33 extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); |
34 extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |
3547 | 35 |
36 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); | |
3532 | 37 |
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
38 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); |
3542 | 39 void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx); |
3547 | 40 void snow_init_altivec(DSPContext* c, AVCodecContext *avctx); |
3581 | 41 void float_init_altivec(DSPContext* c, AVCodecContext *avctx); |
4838
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
4521
diff
changeset
|
42 void int_init_altivec(DSPContext* c, AVCodecContext *avctx); |
3532 | 43 |
44 #endif | |
3223 | 45 |
4197 | 46 int mm_flags = 0; |
894
a408778eff87
altivec accelerated v-resample patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
884
diff
changeset
|
47 |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
48 int mm_support(void) |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
49 { |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
50 int result = 0; |
1511
587258262aa5
recommit (of patch, as cvslog msg didnt apply cleanly)
michael
parents:
1352
diff
changeset
|
51 #ifdef HAVE_ALTIVEC |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
52 if (has_altivec()) { |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
53 result |= MM_ALTIVEC; |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
54 } |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
55 #endif /* result */ |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
56 return result; |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
57 } |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
58 |
4521
891590781d9e
rename POWERPC_PERFORMANCE_REPORT to CONFIG_POWERPC_PERF
mru
parents:
4227
diff
changeset
|
59 #ifdef CONFIG_POWERPC_PERF |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
60 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
61 /* list below must match enum in dsputil_ppc.h */ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
62 static unsigned char* perfname[] = { |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1839
diff
changeset
|
63 "ff_fft_calc_altivec", |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
64 "gmc1_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
65 "dct_unquantize_h263_altivec", |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
66 "fdct_altivec", |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
67 "idct_add_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
68 "idct_put_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
69 "put_pixels16_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
70 "avg_pixels16_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
71 "avg_pixels8_altivec", |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
72 "put_pixels8_xy2_altivec", |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
73 "put_no_rnd_pixels8_xy2_altivec", |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
74 "put_pixels16_xy2_altivec", |
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
75 "put_no_rnd_pixels16_xy2_altivec", |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
76 "hadamard8_diff8x8_altivec", |
1951
2599b8444831
better hadamard8_diff16 in AltiVec, and more patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1949
diff
changeset
|
77 "hadamard8_diff16_altivec", |
2057
4c663228e020
avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
2056
diff
changeset
|
78 "avg_pixels8_xy2_altivec", |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
79 "clear_blocks_dcbz32_ppc", |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
80 "clear_blocks_dcbz128_ppc", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
81 "put_h264_chroma_mc8_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
82 "avg_h264_chroma_mc8_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
83 "put_h264_qpel16_h_lowpass_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
84 "avg_h264_qpel16_h_lowpass_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
85 "put_h264_qpel16_v_lowpass_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
86 "avg_h264_qpel16_v_lowpass_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
87 "put_h264_qpel16_hv_lowpass_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
88 "avg_h264_qpel16_hv_lowpass_altivec", |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
89 "" |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
90 }; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
91 #include <stdio.h> |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
92 #endif |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
93 |
4521
891590781d9e
rename POWERPC_PERFORMANCE_REPORT to CONFIG_POWERPC_PERF
mru
parents:
4227
diff
changeset
|
94 #ifdef CONFIG_POWERPC_PERF |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
95 void powerpc_display_perf_report(void) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
96 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
97 int i, j; |
1839
b370288f004d
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>)
michael
parents:
1708
diff
changeset
|
98 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
99 for(i = 0 ; i < powerpc_perf_total ; i++) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
100 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
101 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) |
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
102 { |
2979 | 103 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) |
104 av_log(NULL, AV_LOG_INFO, | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4003
diff
changeset
|
105 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n", |
2979 | 106 perfname[i], |
107 j+1, | |
108 perfdata[j][i][powerpc_data_min], | |
109 perfdata[j][i][powerpc_data_max], | |
110 (double)perfdata[j][i][powerpc_data_sum] / | |
111 (double)perfdata[j][i][powerpc_data_num], | |
112 perfdata[j][i][powerpc_data_num]); | |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
113 } |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
114 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
115 } |
4521
891590781d9e
rename POWERPC_PERFORMANCE_REPORT to CONFIG_POWERPC_PERF
mru
parents:
4227
diff
changeset
|
116 #endif /* CONFIG_POWERPC_PERF */ |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
117 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
118 /* ***** WARNING ***** WARNING ***** WARNING ***** */ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
119 /* |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
120 clear_blocks_dcbz32_ppc will not work properly |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
121 on PowerPC processors with a cache line size |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
122 not equal to 32 bytes. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
123 Fortunately all processor used by Apple up to |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
124 at least the 7450 (aka second generation G4) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
125 use 32 bytes cache line. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
126 This is due to the use of the 'dcbz' instruction. |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
127 It simply clear to zero a single cache line, |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
128 so you need to know the cache line size to use it ! |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
129 It's absurd, but it's fast... |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
130 |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
131 update 24/06/2003 : Apple released yesterday the G5, |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
132 with a PPC970. cache line size : 128 bytes. Oups. |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
133 The semantic of dcbz was changed, it always clear |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
134 32 bytes. so the function below will work, but will |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
135 be slow. So I fixed check_dcbz_effect to use dcbzl, |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
136 which is defined to clear a cache line (as dcbz before). |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
137 So we still can distinguish, and use dcbz (32 bytes) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
138 or dcbzl (one cache line) as required. |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
139 |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
140 see <http://developer.apple.com/technotes/tn/tn2087.html> |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
141 and <http://developer.apple.com/technotes/tn/tn2086.html> |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
142 */ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
143 void clear_blocks_dcbz32_ppc(DCTELEM *blocks) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
144 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
145 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
146 register int misal = ((unsigned long)blocks & 0x00000010); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
147 register int i = 0; |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
148 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
149 #if 1 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
150 if (misal) { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
151 ((unsigned long*)blocks)[0] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
152 ((unsigned long*)blocks)[1] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
153 ((unsigned long*)blocks)[2] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
154 ((unsigned long*)blocks)[3] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
155 i += 16; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
156 } |
2294
fac626a2b73b
missaliged clear_blocks() and h264 not complied but referenced fix patch by (Roine Gustafsson <roine at users dot sourceforge dot net>) and me
michael
parents:
2236
diff
changeset
|
157 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { |
1340
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
158 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
159 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
160 if (misal) { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
161 ((unsigned long*)blocks)[188] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
162 ((unsigned long*)blocks)[189] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
163 ((unsigned long*)blocks)[190] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
164 ((unsigned long*)blocks)[191] = 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
165 i += 16; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
166 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
167 #else |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
168 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
169 #endif |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
170 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
171 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
172 |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
173 /* same as above, when dcbzl clear a whole 128B cache line |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
174 i.e. the PPC970 aka G5 */ |
3949 | 175 #ifdef HAVE_DCBZL |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
176 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
177 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
178 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
179 register int misal = ((unsigned long)blocks & 0x0000007f); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
180 register int i = 0; |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
181 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
182 #if 1 |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
183 if (misal) { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
184 // we could probably also optimize this case, |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
185 // but there's not much point as the machines |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
186 // aren't available yet (2003-06-26) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
187 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
188 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
189 else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
190 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { |
2979 | 191 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
192 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
193 #else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
194 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
195 #endif |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
196 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
197 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
198 #else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
199 void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
200 { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
201 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
202 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
203 #endif |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
204 |
3949 | 205 #ifdef HAVE_DCBZL |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
206 /* check dcbz report how many bytes are set to 0 by dcbz */ |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
207 /* update 24/06/2003 : replace dcbz by dcbzl to get |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
208 the intended effect (Apple "fixed" dcbz) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
209 unfortunately this cannot be used unless the assembler |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
210 knows about dcbzl ... */ |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
211 long check_dcbzl_effect(void) |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
212 { |
5958
ed05a3d964fa
stupid code (casting of void*) found by checktree.sh
michael
parents:
5749
diff
changeset
|
213 register char *fakedata = av_malloc(1024); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
214 register char *fakedata_middle; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
215 register long zero = 0; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
216 register long i = 0; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
217 long count = 0; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
218 |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
219 if (!fakedata) |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
220 { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
221 return 0L; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
222 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
223 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
224 fakedata_middle = (fakedata + 512); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
225 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
226 memset(fakedata, 0xFF, 1024); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
227 |
1340
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
228 /* below the constraint "b" seems to mean "Address base register" |
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
229 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ |
09b8fe0f0139
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1334
diff
changeset
|
230 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
231 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
232 for (i = 0; i < 1024 ; i ++) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
233 { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
234 if (fakedata[i] == (char)0) |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
235 count++; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
236 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
237 |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1024
diff
changeset
|
238 av_free(fakedata); |
2967 | 239 |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
240 return count; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
241 } |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
242 #else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
243 long check_dcbzl_effect(void) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
244 { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
245 return 0; |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
246 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
247 #endif |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
248 |
4003 | 249 static void prefetch_ppc(void *mem, int stride, int h) |
250 { | |
251 register const uint8_t *p = mem; | |
252 do { | |
253 asm volatile ("dcbt 0,%0" : : "r" (p)); | |
254 p+= stride; | |
255 } while(--h); | |
256 } | |
257 | |
1092 | 258 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
259 { |
5749 | 260 // Common optimizations whether AltiVec is available or not |
4003 | 261 c->prefetch = prefetch_ppc; |
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
262 switch (check_dcbzl_effect()) { |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
263 case 32: |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
264 c->clear_blocks = clear_blocks_dcbz32_ppc; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
265 break; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
266 case 128: |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
267 c->clear_blocks = clear_blocks_dcbz128_ppc; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
268 break; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
269 default: |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
270 break; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
271 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
272 |
2294
fac626a2b73b
missaliged clear_blocks() and h264 not complied but referenced fix patch by (Roine Gustafsson <roine at users dot sourceforge dot net>) and me
michael
parents:
2236
diff
changeset
|
273 #ifdef HAVE_ALTIVEC |
4227
ef1d382309e5
Conditionally compile some of the AltiVec optimizations.
diego
parents:
4197
diff
changeset
|
274 if(ENABLE_H264_DECODER) dsputil_h264_init_ppc(c, avctx); |
2967 | 275 |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
276 if (has_altivec()) { |
4197 | 277 mm_flags |= MM_ALTIVEC; |
2967 | 278 |
3547 | 279 dsputil_init_altivec(c, avctx); |
4227
ef1d382309e5
Conditionally compile some of the AltiVec optimizations.
diego
parents:
4197
diff
changeset
|
280 if(ENABLE_SNOW_DECODER) snow_init_altivec(c, avctx); |
ef1d382309e5
Conditionally compile some of the AltiVec optimizations.
diego
parents:
4197
diff
changeset
|
281 if(ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER) |
ef1d382309e5
Conditionally compile some of the AltiVec optimizations.
diego
parents:
4197
diff
changeset
|
282 vc1dsp_init_altivec(c, avctx); |
3581 | 283 float_init_altivec(c, avctx); |
4838
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
4521
diff
changeset
|
284 int_init_altivec(c, avctx); |
2979 | 285 c->gmc1 = gmc1_altivec; |
1092 | 286 |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
287 #ifdef CONFIG_ENCODERS |
2979 | 288 if (avctx->dct_algo == FF_DCT_AUTO || |
289 avctx->dct_algo == FF_DCT_ALTIVEC) | |
290 { | |
291 c->fdct = fdct_altivec; | |
292 } | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
293 #endif //CONFIG_ENCODERS |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
294 |
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
295 if (avctx->lowres==0) |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
296 { |
1092 | 297 if ((avctx->idct_algo == FF_IDCT_AUTO) || |
298 (avctx->idct_algo == FF_IDCT_ALTIVEC)) | |
299 { | |
300 c->idct_put = idct_put_altivec; | |
301 c->idct_add = idct_add_altivec; | |
302 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |
303 } | |
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
304 } |
2967 | 305 |
4521
891590781d9e
rename POWERPC_PERFORMANCE_REPORT to CONFIG_POWERPC_PERF
mru
parents:
4227
diff
changeset
|
306 #ifdef CONFIG_POWERPC_PERF |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
307 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1340
diff
changeset
|
308 int i, j; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
309 for (i = 0 ; i < powerpc_perf_total ; i++) |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
310 { |
2979 | 311 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) |
312 { | |
313 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; | |
314 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | |
315 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | |
316 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | |
317 } | |
318 } | |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
319 } |
4521
891590781d9e
rename POWERPC_PERFORMANCE_REPORT to CONFIG_POWERPC_PERF
mru
parents:
4227
diff
changeset
|
320 #endif /* CONFIG_POWERPC_PERF */ |
3957
b6f6bf155661
Non Altivec optimizations already present at the top
lu_zero
parents:
3949
diff
changeset
|
321 } |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
322 #endif /* HAVE_ALTIVEC */ |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
323 } |