Mercurial > libavcodec.hg
annotate ppc/dsputil_ppc.c @ 12454:f4355cd85faa libavcodec
Port latest x264 deblock asm (before they moved to using NV12 as internal
format), LGPL'ed with permission from Jason and Loren. This includes mmx2
code, so remove inline asm from h264dsp_mmx.c accordingly.
author | rbultje |
---|---|
date | Fri, 03 Sep 2010 16:52:46 +0000 |
parents | 3fc4c625b6f3 |
children | a5ddb39627fd |
rev | line source |
---|---|
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
1 /* |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
2 * Copyright (c) 2002 Brian Foley |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
3 * Copyright (c) 2002 Dieter Shirley |
1949
66215baae7b9
hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents:
1879
diff
changeset
|
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
5 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
6 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
7 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
12 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
16 * Lesser General Public License for more details. |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
17 * |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3581
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
828
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
21 */ |
ace3ccd18dd2
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents:
748
diff
changeset
|
22 |
6763 | 23 #include "libavcodec/dsputil.h" |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
24 #include "dsputil_altivec.h" |
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
25 |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
26 int mm_support(void) |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
27 { |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
28 int result = 0; |
8590 | 29 #if HAVE_ALTIVEC |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
30 if (has_altivec()) { |
8104
0d108ec85620
Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents:
8031
diff
changeset
|
31 result |= FF_MM_ALTIVEC; |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
32 } |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
33 #endif /* result */ |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
34 return result; |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
35 } |
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
36 |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
37 /* ***** WARNING ***** WARNING ***** WARNING ***** */ |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
38 /* |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
39 clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
40 cache line size not equal to 32 bytes. |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
41 Fortunately all processor used by Apple up to at least the 7450 (aka second |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
42 generation G4) use 32 bytes cache line. |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
43 This is due to the use of the 'dcbz' instruction. It simply clear to zero a |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
44 single cache line, so you need to know the cache line size to use it ! |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
45 It's absurd, but it's fast... |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
46 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
47 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
48 size: 128 bytes. Oups. |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
49 The semantic of dcbz was changed, it always clear 32 bytes. so the function |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
50 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
51 which is defined to clear a cache line (as dcbz before). So we still can |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
52 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
53 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
54 see <http://developer.apple.com/technotes/tn/tn2087.html> |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
55 and <http://developer.apple.com/technotes/tn/tn2086.html> |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
56 */ |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
57 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks) |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
58 { |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
59 register int misal = ((unsigned long)blocks & 0x00000010); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
60 register int i = 0; |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
61 #if 1 |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
62 if (misal) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
63 ((unsigned long*)blocks)[0] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
64 ((unsigned long*)blocks)[1] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
65 ((unsigned long*)blocks)[2] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
66 ((unsigned long*)blocks)[3] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
67 i += 16; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
68 } |
2294
fac626a2b73b
missaliged clear_blocks() and h264 not complied but referenced fix patch by (Roine Gustafsson <roine at users dot sourceforge dot net>) and me
michael
parents:
2236
diff
changeset
|
69 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { |
8031 | 70 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
71 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
72 if (misal) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
73 ((unsigned long*)blocks)[188] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
74 ((unsigned long*)blocks)[189] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
75 ((unsigned long*)blocks)[190] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
76 ((unsigned long*)blocks)[191] = 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
77 i += 16; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
78 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
79 #else |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
80 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
81 #endif |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
82 } |
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
83 |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
84 /* same as above, when dcbzl clear a whole 128B cache line |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
85 i.e. the PPC970 aka G5 */ |
8590 | 86 #if HAVE_DCBZL |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
87 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
88 { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
89 register int misal = ((unsigned long)blocks & 0x0000007f); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
90 register int i = 0; |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
91 #if 1 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
92 if (misal) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
93 // we could probably also optimize this case, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
94 // but there's not much point as the machines |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
95 // aren't available yet (2003-06-26) |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
96 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
97 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
98 else |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
99 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { |
8031 | 100 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
101 } |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
102 #else |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
103 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
104 #endif |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
105 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
106 #else |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
107 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
108 { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
109 memset(blocks, 0, sizeof(DCTELEM)*6*64); |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
110 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
111 #endif |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
112 |
8590 | 113 #if HAVE_DCBZL |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
114 /* check dcbz report how many bytes are set to 0 by dcbz */ |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
115 /* update 24/06/2003 : replace dcbz by dcbzl to get |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
116 the intended effect (Apple "fixed" dcbz) |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
117 unfortunately this cannot be used unless the assembler |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
118 knows about dcbzl ... */ |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
119 static long check_dcbzl_effect(void) |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
120 { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
121 register char *fakedata = av_malloc(1024); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
122 register char *fakedata_middle; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
123 register long zero = 0; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
124 register long i = 0; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
125 long count = 0; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
126 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
127 if (!fakedata) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
128 return 0L; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
129 } |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
130 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
131 fakedata_middle = (fakedata + 512); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
132 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
133 memset(fakedata, 0xFF, 1024); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
134 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
135 /* below the constraint "b" seems to mean "Address base register" |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
136 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ |
8031 | 137 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
138 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
139 for (i = 0; i < 1024 ; i ++) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
140 if (fakedata[i] == (char)0) |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
141 count++; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
142 } |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
143 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
144 av_free(fakedata); |
2967 | 145 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
146 return count; |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
147 } |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
148 #else |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
9995
diff
changeset
|
149 static long check_dcbzl_effect(void) |
1334
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
150 { |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
151 return 0; |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
152 } |
80c46c310a91
PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1092
diff
changeset
|
153 #endif |
1015
35cf2f4a0f8c
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1009
diff
changeset
|
154 |
4003 | 155 static void prefetch_ppc(void *mem, int stride, int h) |
156 { | |
157 register const uint8_t *p = mem; | |
158 do { | |
8031 | 159 __asm__ volatile ("dcbt 0,%0" : : "r" (p)); |
4003 | 160 p+= stride; |
161 } while(--h); | |
162 } | |
163 | |
1092 | 164 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
165 { |
5749 | 166 // Common optimizations whether AltiVec is available or not |
4003 | 167 c->prefetch = prefetch_ppc; |
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
168 switch (check_dcbzl_effect()) { |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
169 case 32: |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
170 c->clear_blocks = clear_blocks_dcbz32_ppc; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
171 break; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
172 case 128: |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
173 c->clear_blocks = clear_blocks_dcbz128_ppc; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
174 break; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
175 default: |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
176 break; |
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
177 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
2068
diff
changeset
|
178 |
8590 | 179 #if HAVE_ALTIVEC |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
180 if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx); |
2967 | 181 |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
182 if (has_altivec()) { |
3547 | 183 dsputil_init_altivec(c, avctx); |
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
184 if(CONFIG_VC1_DECODER) |
4227
ef1d382309e5
Conditionally compile some of the AltiVec optimizations.
diego
parents:
4197
diff
changeset
|
185 vc1dsp_init_altivec(c, avctx); |
3581 | 186 float_init_altivec(c, avctx); |
4838
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
4521
diff
changeset
|
187 int_init_altivec(c, avctx); |
2979 | 188 c->gmc1 = gmc1_altivec; |
1092 | 189 |
8590 | 190 #if CONFIG_ENCODERS |
2979 | 191 if (avctx->dct_algo == FF_DCT_AUTO || |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
192 avctx->dct_algo == FF_DCT_ALTIVEC) { |
2979 | 193 c->fdct = fdct_altivec; |
194 } | |
1578
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
195 #endif //CONFIG_ENCODERS |
6a4cfc5f9f96
AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents:
1511
diff
changeset
|
196 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
197 if (avctx->lowres==0) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
198 if ((avctx->idct_algo == FF_IDCT_AUTO) || |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
199 (avctx->idct_algo == FF_IDCT_ALTIVEC)) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
200 c->idct_put = idct_put_altivec; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
201 c->idct_add = idct_add_altivec; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
202 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; |
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9711
diff
changeset
|
203 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) && |
9711 | 204 avctx->idct_algo==FF_IDCT_VP3){ |
205 c->idct_put = ff_vp3_idct_put_altivec; | |
206 c->idct_add = ff_vp3_idct_add_altivec; | |
207 c->idct = ff_vp3_idct_altivec; | |
208 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6763
diff
changeset
|
209 } |
3546
5f97ba9a4eaa
Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents:
3542
diff
changeset
|
210 } |
2967 | 211 |
3957
b6f6bf155661
Non Altivec optimizations already present at the top
lu_zero
parents:
3949
diff
changeset
|
212 } |
1024
9cc1031e1864
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
1015
diff
changeset
|
213 #endif /* HAVE_ALTIVEC */ |
638
0012f75c92bb
altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff
changeset
|
214 } |