annotate ppc/gmc_altivec.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 3cd4cd0509cd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
1 /*
1001
95cbffdc98a9 dct_unquantize_h263_altivec by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 995
diff changeset
2 * GMC (Global Motion Compensation)
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
3 * AltiVec-enabled
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
4 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
5 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
6 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
7 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
12 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
16 * Lesser General Public License for more details.
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
17 *
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3036
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
21 */
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
22
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 5963
diff changeset
23 #include "libavcodec/dsputil.h"
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents: 5746
diff changeset
24 #include "util_altivec.h"
10079
71ead14665e3 PPC: simplify loading some values into altivec registers
mru
parents: 9364
diff changeset
25 #include "types_altivec.h"
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 11369
diff changeset
26 #include "dsputil_altivec.h"
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
27
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
28 /*
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
29 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
5963
80103098c797 spelling
vitor
parents: 5750
diff changeset
30 to preserve proper dst alignment.
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
31 */
1064
b32afefe7d33 * UINTX -> uintx_t INTX -> intx_t
kabi
parents: 1033
diff changeset
32 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
33 {
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
34 const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder;
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
35 const DECLARE_ALIGNED(16, unsigned short, ABCD)[8] =
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
36 {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
37 (16-x16)*(16-y16), /* A */
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
38 ( x16)*(16-y16), /* B */
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
39 (16-x16)*( y16), /* C */
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
40 ( x16)*( y16), /* D */
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
41 0, 0, 0, 0 /* padding */
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
42 };
5746
55ed6dc5d476 Remove const vector macro indirection that is useless and obfuscating
diego
parents: 5019
diff changeset
43 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
55ed6dc5d476 Remove const vector macro indirection that is useless and obfuscating
diego
parents: 5019
diff changeset
44 register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8);
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
45 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
46 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
47 int i;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
48 unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
49 unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
50
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
51 tempA = vec_ld(0, (unsigned short*)ABCD);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
52 Av = vec_splat(tempA, 0);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
53 Bv = vec_splat(tempA, 1);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
54 Cv = vec_splat(tempA, 2);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
55 Dv = vec_splat(tempA, 3);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
56
10079
71ead14665e3 PPC: simplify loading some values into altivec registers
mru
parents: 9364
diff changeset
57 rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
58
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
59 // we'll be able to pick-up our 9 char elements
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
60 // at src from those 32 bytes
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
61 // we load the first batch here, as inside the loop
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
62 // we can re-use 'src+stride' from one iteration
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
63 // as the 'src' of the next.
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
64 src_0 = vec_ld(0, src);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
65 src_1 = vec_ld(16, src);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
66 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
67
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
68 if (src_really_odd != 0x0000000F) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
69 // if src & 0xF == 0xF, then (src+1) is properly aligned
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
70 // on the second vector.
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
71 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
72 } else {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
73 srcvB = src_1;
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
74 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
75 srcvA = vec_mergeh(vczero, srcvA);
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
76 srcvB = vec_mergeh(vczero, srcvB);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
77
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
78 for(i=0; i<h; i++) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
79 dst_odd = (unsigned long)dst & 0x0000000F;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
80 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
81
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
82 dstv = vec_ld(0, dst);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
83
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
84 // we we'll be able to pick-up our 9 char elements
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
85 // at src + stride from those 32 bytes
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
86 // then reuse the resulting 2 vectors srvcC and srcvD
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
87 // as the next srcvA and srcvB
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
88 src_0 = vec_ld(stride + 0, src);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
89 src_1 = vec_ld(stride + 16, src);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
90 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
91
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
92 if (src_really_odd != 0x0000000F) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
93 // if src & 0xF == 0xF, then (src+1) is properly aligned
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
94 // on the second vector.
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
95 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
96 } else {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
97 srcvD = src_1;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
98 }
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
99
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
100 srcvC = vec_mergeh(vczero, srcvC);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
101 srcvD = vec_mergeh(vczero, srcvD);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
102
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
103
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
104 // OK, now we (finally) do the math :-)
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
105 // those four instructions replaces 32 int muls & 32 int adds.
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
106 // isn't AltiVec nice ?
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
107 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
108 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
109 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
110 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
111
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
112 srcvA = srcvC;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
113 srcvB = srcvD;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
114
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
115 tempD = vec_sr(tempD, vcsr8);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
116
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
117 dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
118
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
119 if (dst_odd) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
120 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
121 } else {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
122 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
123 }
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
124
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
125 vec_st(dstv2, 0, dst);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 1839
diff changeset
126
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
127 dst += stride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6763
diff changeset
128 src += stride;
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
129 }
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
diff changeset
130 }