annotate ppc/dsputil_ppc.c @ 12454:f4355cd85faa libavcodec

Port latest x264 deblock asm (before they moved to using NV12 as internal format), LGPL'ed with permission from Jason and Loren. This includes mmx2 code, so remove inline asm from h264dsp_mmx.c accordingly.
author rbultje
date Fri, 03 Sep 2010 16:52:46 +0000
parents 3fc4c625b6f3
children a5ddb39627fd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
1 /*
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
2 * Copyright (c) 2002 Brian Foley
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
3 * Copyright (c) 2002 Dieter Shirley
1949
66215baae7b9 hadamard8_diff8x8 in AltiVec, the 16bits edition by (Romain Dolbeau <dolbeau at irisa dot fr>)
michael
parents: 1879
diff changeset
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
5 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3581
diff changeset
6 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3581
diff changeset
7 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3581
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
10 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3581
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
12 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3581
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
16 * Lesser General Public License for more details.
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
17 *
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3581
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
828
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
21 */
ace3ccd18dd2 Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>)
michaelni
parents: 748
diff changeset
22
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 5958
diff changeset
23 #include "libavcodec/dsputil.h"
638
0012f75c92bb altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
24 #include "dsputil_altivec.h"
0012f75c92bb altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
25
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
26 int mm_support(void)
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
27 {
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
28 int result = 0;
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8250
diff changeset
29 #if HAVE_ALTIVEC
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
30 if (has_altivec()) {
8104
0d108ec85620 Remove duplicated MM_* macros for CPU capabilities from dsputil.h.
rathann
parents: 8031
diff changeset
31 result |= FF_MM_ALTIVEC;
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
32 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
33 #endif /* result */
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
34 return result;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
35 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
36
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
37 /* ***** WARNING ***** WARNING ***** WARNING ***** */
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
38 /*
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
39 clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
40 cache line size not equal to 32 bytes.
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
41 Fortunately all processor used by Apple up to at least the 7450 (aka second
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
42 generation G4) use 32 bytes cache line.
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
43 This is due to the use of the 'dcbz' instruction. It simply clear to zero a
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
44 single cache line, so you need to know the cache line size to use it !
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
45 It's absurd, but it's fast...
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
46
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
47 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
48 size: 128 bytes. Oups.
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
49 The semantic of dcbz was changed, it always clear 32 bytes. so the function
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
50 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
51 which is defined to clear a cache line (as dcbz before). So we still can
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
52 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
53
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
54 see <http://developer.apple.com/technotes/tn/tn2087.html>
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
55 and <http://developer.apple.com/technotes/tn/tn2086.html>
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
56 */
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 9995
diff changeset
57 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
58 {
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
59 register int misal = ((unsigned long)blocks & 0x00000010);
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
60 register int i = 0;
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
61 #if 1
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
62 if (misal) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
63 ((unsigned long*)blocks)[0] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
64 ((unsigned long*)blocks)[1] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
65 ((unsigned long*)blocks)[2] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
66 ((unsigned long*)blocks)[3] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
67 i += 16;
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
68 }
2294
fac626a2b73b missaliged clear_blocks() and h264 not complied but referenced fix patch by (Roine Gustafsson <roine at users dot sourceforge dot net>) and me
michael
parents: 2236
diff changeset
69 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7334
diff changeset
70 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
71 }
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
72 if (misal) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
73 ((unsigned long*)blocks)[188] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
74 ((unsigned long*)blocks)[189] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
75 ((unsigned long*)blocks)[190] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
76 ((unsigned long*)blocks)[191] = 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
77 i += 16;
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
78 }
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
79 #else
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
80 memset(blocks, 0, sizeof(DCTELEM)*6*64);
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
81 #endif
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
82 }
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
83
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
84 /* same as above, when dcbzl clear a whole 128B cache line
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
85 i.e. the PPC970 aka G5 */
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8250
diff changeset
86 #if HAVE_DCBZL
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 9995
diff changeset
87 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
88 {
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
89 register int misal = ((unsigned long)blocks & 0x0000007f);
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
90 register int i = 0;
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
91 #if 1
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
92 if (misal) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
93 // we could probably also optimize this case,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
94 // but there's not much point as the machines
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
95 // aren't available yet (2003-06-26)
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
96 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
97 }
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
98 else
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
99 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7334
diff changeset
100 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
101 }
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
102 #else
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
103 memset(blocks, 0, sizeof(DCTELEM)*6*64);
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
104 #endif
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
105 }
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
106 #else
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 9995
diff changeset
107 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
108 {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
109 memset(blocks, 0, sizeof(DCTELEM)*6*64);
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
110 }
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
111 #endif
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
112
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8250
diff changeset
113 #if HAVE_DCBZL
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
114 /* check dcbz report how many bytes are set to 0 by dcbz */
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
115 /* update 24/06/2003 : replace dcbz by dcbzl to get
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
116 the intended effect (Apple "fixed" dcbz)
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
117 unfortunately this cannot be used unless the assembler
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
118 knows about dcbzl ... */
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 9995
diff changeset
119 static long check_dcbzl_effect(void)
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
120 {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
121 register char *fakedata = av_malloc(1024);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
122 register char *fakedata_middle;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
123 register long zero = 0;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
124 register long i = 0;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
125 long count = 0;
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
126
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
127 if (!fakedata) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
128 return 0L;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
129 }
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
130
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
131 fakedata_middle = (fakedata + 512);
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
132
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
133 memset(fakedata, 0xFF, 1024);
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
134
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
135 /* below the constraint "b" seems to mean "Address base register"
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
136 in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7334
diff changeset
137 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
138
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
139 for (i = 0; i < 1024 ; i ++) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
140 if (fakedata[i] == (char)0)
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
141 count++;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
142 }
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
143
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
144 av_free(fakedata);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2778
diff changeset
145
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
146 return count;
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
147 }
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
148 #else
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 9995
diff changeset
149 static long check_dcbzl_effect(void)
1334
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
150 {
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
151 return 0;
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
152 }
80c46c310a91 PPC970 patch + cpu-specific tuning support by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1092
diff changeset
153 #endif
1015
35cf2f4a0f8c PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1009
diff changeset
154
4003
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
155 static void prefetch_ppc(void *mem, int stride, int h)
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
156 {
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
157 register const uint8_t *p = mem;
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
158 do {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7334
diff changeset
159 __asm__ volatile ("dcbt 0,%0" : : "r" (p));
4003
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
160 p+= stride;
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
161 } while(--h);
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
162 }
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
163
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1033
diff changeset
164 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
638
0012f75c92bb altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
165 {
5749
784dcbdc910f cosmetics: Fix AltiVec spelling.
diego
parents: 5744
diff changeset
166 // Common optimizations whether AltiVec is available or not
4003
38ccf93476a1 ppc generic prefetch
lu_zero
parents: 3973
diff changeset
167 c->prefetch = prefetch_ppc;
3546
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
168 switch (check_dcbzl_effect()) {
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
169 case 32:
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
170 c->clear_blocks = clear_blocks_dcbz32_ppc;
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
171 break;
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
172 case 128:
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
173 c->clear_blocks = clear_blocks_dcbz128_ppc;
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
174 break;
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
175 default:
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
176 break;
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
177 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents: 2068
diff changeset
178
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8250
diff changeset
179 #if HAVE_ALTIVEC
8596
68e959302527 replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents: 8590
diff changeset
180 if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2778
diff changeset
181
638
0012f75c92bb altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
182 if (has_altivec()) {
3547
e542c9978077 standalone snow dsputil init
lu_zero
parents: 3546
diff changeset
183 dsputil_init_altivec(c, avctx);
9995
3141f69e3905 Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents: 9975
diff changeset
184 if(CONFIG_VC1_DECODER)
4227
ef1d382309e5 Conditionally compile some of the AltiVec optimizations.
diego
parents: 4197
diff changeset
185 vc1dsp_init_altivec(c, avctx);
3581
49082584828a altivec float optimizations
lu_zero
parents: 3547
diff changeset
186 float_init_altivec(c, avctx);
4838
eeac11145c4e ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents: 4521
diff changeset
187 int_init_altivec(c, avctx);
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
188 c->gmc1 = gmc1_altivec;
1092
f59c3f66363b MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
michaelni
parents: 1033
diff changeset
189
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8250
diff changeset
190 #if CONFIG_ENCODERS
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
191 if (avctx->dct_algo == FF_DCT_AUTO ||
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
192 avctx->dct_algo == FF_DCT_ALTIVEC) {
2979
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
193 c->fdct = fdct_altivec;
bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting
diego
parents: 2967
diff changeset
194 }
1578
6a4cfc5f9f96 AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents: 1511
diff changeset
195 #endif //CONFIG_ENCODERS
6a4cfc5f9f96 AltiVec optimized fdct patch by (James Klicman <james at klicman dot org>)
michael
parents: 1511
diff changeset
196
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
197 if (avctx->lowres==0) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
198 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
199 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
200 c->idct_put = idct_put_altivec;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
201 c->idct_add = idct_add_altivec;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
202 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
9975
d6d7e8d4a04d Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents: 9711
diff changeset
203 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
9711
d563821462b4 Altivec VP3 IDCT
conrad
parents: 9420
diff changeset
204 avctx->idct_algo==FF_IDCT_VP3){
d563821462b4 Altivec VP3 IDCT
conrad
parents: 9420
diff changeset
205 c->idct_put = ff_vp3_idct_put_altivec;
d563821462b4 Altivec VP3 IDCT
conrad
parents: 9420
diff changeset
206 c->idct_add = ff_vp3_idct_add_altivec;
d563821462b4 Altivec VP3 IDCT
conrad
parents: 9420
diff changeset
207 c->idct = ff_vp3_idct_altivec;
d563821462b4 Altivec VP3 IDCT
conrad
parents: 9420
diff changeset
208 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
209 }
3546
5f97ba9a4eaa Almost cosmetic changes in dsputil_init_ppc and vorbis_inverse_coupling_altivec:
lu_zero
parents: 3542
diff changeset
210 }
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2778
diff changeset
211
3957
b6f6bf155661 Non Altivec optimizations already present at the top
lu_zero
parents: 3949
diff changeset
212 }
1024
9cc1031e1864 More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 1015
diff changeset
213 #endif /* HAVE_ALTIVEC */
638
0012f75c92bb altivec build tidyup patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
diff changeset
214 }