annotate libswscale/swscale_altivec_template.c @ 22998:85343b921fef

Fix preprocessor conditionals: libdvdnav includes libdvdread and configure enables libdvdread support if libdvdnav is found. There is no need to check separately in the source with the preprocessor.
author diego
date Wed, 18 Apr 2007 09:56:24 +0000
parents 5f47768cbda5
children 9528d1ebe68f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19200
07769a1d06a9 Fix compile error due to extra "FF" at beginning of file in the FF{MIN,MAX}
pacman
parents: 19181
diff changeset
1 /*
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
2 * AltiVec-enhanced yuv2yuvX
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
3 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
4 * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
5 * based on the equivalent C code in "postproc/swscale.c"
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
6 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
7 * This file is part of FFmpeg.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
8 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or modify
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
10 * it under the terms of the GNU General Public License as published by
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
11 * the Free Software Foundation; either version 2 of the License, or
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
12 * (at your option) any later version.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
13 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
17 * GNU General Public License for more details.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
18 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
19 * You should have received a copy of the GNU General Public License
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
20 * along with FFmpeg; if not, write to the Free Software
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20006
diff changeset
22 */
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
23
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
24 #ifdef CONFIG_DARWIN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
25 #define AVV(x...) (x)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
26 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
27 #define AVV(x...) {x}
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
28 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
29
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
30 #define vzero vec_splat_s32(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
31
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
32 static inline void
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
33 altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
34 register int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
35 vector unsigned int altivec_vectorShiftInt19 =
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
36 vec_add(vec_splat_u32(10),vec_splat_u32(9));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
37 if ((unsigned long)dest % 16) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
38 /* badly aligned store, we force store alignement */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
39 /* and will handle load misalignement on val w/ vec_perm */
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
40 vector unsigned char perm1;
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
41 vector signed int v1;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
42 for (i = 0 ; (i < dstW) &&
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
43 (((unsigned long)dest + i) % 16) ; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
44 int t = val[i] >> 19;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
45 dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
46 }
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
47 perm1 = vec_lvsl(i << 2, val);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
48 v1 = vec_ld(i << 2, val);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
49 for ( ; i < (dstW - 15); i+=16) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
50 int offset = i << 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
51 vector signed int v2 = vec_ld(offset + 16, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
52 vector signed int v3 = vec_ld(offset + 32, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
53 vector signed int v4 = vec_ld(offset + 48, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
54 vector signed int v5 = vec_ld(offset + 64, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
55 vector signed int v12 = vec_perm(v1,v2,perm1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
56 vector signed int v23 = vec_perm(v2,v3,perm1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
57 vector signed int v34 = vec_perm(v3,v4,perm1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
58 vector signed int v45 = vec_perm(v4,v5,perm1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
59
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
60 vector signed int vA = vec_sra(v12, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
61 vector signed int vB = vec_sra(v23, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
62 vector signed int vC = vec_sra(v34, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
63 vector signed int vD = vec_sra(v45, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
64 vector unsigned short vs1 = vec_packsu(vA, vB);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
65 vector unsigned short vs2 = vec_packsu(vC, vD);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
66 vector unsigned char vf = vec_packsu(vs1, vs2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
67 vec_st(vf, i, dest);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
68 v1 = v5;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
69 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
70 } else { // dest is properly aligned, great
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
71 for (i = 0; i < (dstW - 15); i+=16) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
72 int offset = i << 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
73 vector signed int v1 = vec_ld(offset, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
74 vector signed int v2 = vec_ld(offset + 16, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
75 vector signed int v3 = vec_ld(offset + 32, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
76 vector signed int v4 = vec_ld(offset + 48, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
77 vector signed int v5 = vec_sra(v1, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
78 vector signed int v6 = vec_sra(v2, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
79 vector signed int v7 = vec_sra(v3, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
80 vector signed int v8 = vec_sra(v4, altivec_vectorShiftInt19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
81 vector unsigned short vs1 = vec_packsu(v5, v6);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
82 vector unsigned short vs2 = vec_packsu(v7, v8);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
83 vector unsigned char vf = vec_packsu(vs1, vs2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
84 vec_st(vf, i, dest);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
85 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
86 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
87 for ( ; i < dstW ; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
88 int t = val[i] >> 19;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
89 dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
90 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
91 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
92
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
93 static inline void
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
94 yuv2yuvX_altivec_real(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
95 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
96 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
97 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
98 const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
99 register int i, j;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
100 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
101 int __attribute__ ((aligned (16))) val[dstW];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
102
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
103 for (i = 0; i < (dstW -7); i+=4) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
104 vec_st(vini, i << 2, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
105 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
106 for (; i < dstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
107 val[i] = (1 << 18);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
108 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
109
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
110 for (j = 0; j < lumFilterSize; j++) {
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
111 vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
112 vector unsigned char perm, perm0 = vec_lvsl(j << 1, lumFilter);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
113 vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
114 vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
115
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
116 perm = vec_lvsl(0, lumSrc[j]);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
117 l1 = vec_ld(0, lumSrc[j]);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
118
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
119 for (i = 0; i < (dstW - 7); i+=8) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
120 int offset = i << 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
121 vector signed short l2 = vec_ld((i << 1) + 16, lumSrc[j]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
122
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
123 vector signed int v1 = vec_ld(offset, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
124 vector signed int v2 = vec_ld(offset + 16, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
125
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
126 vector signed short ls = vec_perm(l1, l2, perm); // lumSrc[j][i] ... lumSrc[j][i+7]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
127
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
128 vector signed int i1 = vec_mule(vLumFilter, ls);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
129 vector signed int i2 = vec_mulo(vLumFilter, ls);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
130
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
131 vector signed int vf1 = vec_mergeh(i1, i2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
132 vector signed int vf2 = vec_mergel(i1, i2); // lumSrc[j][i] * lumFilter[j] ... lumSrc[j][i+7] * lumFilter[j]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
133
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
134 vector signed int vo1 = vec_add(v1, vf1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
135 vector signed int vo2 = vec_add(v2, vf2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
136
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
137 vec_st(vo1, offset, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
138 vec_st(vo2, offset + 16, val);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
139
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
140 l1 = l2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
141 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
142 for ( ; i < dstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
143 val[i] += lumSrc[j][i] * lumFilter[j];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
144 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
145 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
146 altivec_packIntArrayToCharArray(val,dest,dstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
147 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
148 if (uDest != 0) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
149 int __attribute__ ((aligned (16))) u[chrDstW];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
150 int __attribute__ ((aligned (16))) v[chrDstW];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
151
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
152 for (i = 0; i < (chrDstW -7); i+=4) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
153 vec_st(vini, i << 2, u);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
154 vec_st(vini, i << 2, v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
155 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
156 for (; i < chrDstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
157 u[i] = (1 << 18);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
158 v[i] = (1 << 18);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
159 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
160
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
161 for (j = 0; j < chrFilterSize; j++) {
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
162 vector signed short l1, l1_V, vChrFilter = vec_ld(j << 1, chrFilter);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
163 vector unsigned char perm, perm0 = vec_lvsl(j << 1, chrFilter);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
164 vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
165 vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
166
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
167 perm = vec_lvsl(0, chrSrc[j]);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
168 l1 = vec_ld(0, chrSrc[j]);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
169 l1_V = vec_ld(2048 << 1, chrSrc[j]);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
170
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
171 for (i = 0; i < (chrDstW - 7); i+=8) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
172 int offset = i << 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
173 vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
174 vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
175
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
176 vector signed int v1 = vec_ld(offset, u);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
177 vector signed int v2 = vec_ld(offset + 16, u);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
178 vector signed int v1_V = vec_ld(offset, v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
179 vector signed int v2_V = vec_ld(offset + 16, v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
180
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
181 vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
182 vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
183
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
184 vector signed int i1 = vec_mule(vChrFilter, ls);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
185 vector signed int i2 = vec_mulo(vChrFilter, ls);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
186 vector signed int i1_V = vec_mule(vChrFilter, ls_V);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
187 vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
188
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
189 vector signed int vf1 = vec_mergeh(i1, i2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
190 vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
191 vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
192 vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
193
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
194 vector signed int vo1 = vec_add(v1, vf1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
195 vector signed int vo2 = vec_add(v2, vf2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
196 vector signed int vo1_V = vec_add(v1_V, vf1_V);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
197 vector signed int vo2_V = vec_add(v2_V, vf2_V);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
198
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
199 vec_st(vo1, offset, u);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
200 vec_st(vo2, offset + 16, u);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
201 vec_st(vo1_V, offset, v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
202 vec_st(vo2_V, offset + 16, v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
203
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
204 l1 = l2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
205 l1_V = l2_V;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
206 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
207 for ( ; i < chrDstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
208 u[i] += chrSrc[j][i] * chrFilter[j];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
209 v[i] += chrSrc[j][i + 2048] * chrFilter[j];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
210 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
211 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
212 altivec_packIntArrayToCharArray(u,uDest,chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
213 altivec_packIntArrayToCharArray(v,vDest,chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
214 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
215 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
216
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
217 static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, int16_t *filter, int16_t *filterPos, int filterSize) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
218 register int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
219 int __attribute__ ((aligned (16))) tempo[4];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
220
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
221 if (filterSize % 4) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
222 for(i=0; i<dstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
223 register int j;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
224 register int srcPos = filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
225 register int val = 0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
226 for(j=0; j<filterSize; j++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
227 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
228 }
22321
5f47768cbda5 Add av_ prefix to clip functions
reimar
parents: 21760
diff changeset
229 dst[i] = av_clip(val>>7, 0, (1<<15)-1);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
230 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
231 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
232 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
233 switch (filterSize) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
234 case 4:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
235 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
236 for(i=0; i<dstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
237 register int srcPos = filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
238
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
239 vector unsigned char src_v0 = vec_ld(srcPos, src);
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
240 vector unsigned char src_v1, src_vF;
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
241 vector signed short src_v, filter_v;
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
242 vector signed int val_vEven, val_s;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
243 if ((((int)src + srcPos)% 16) > 12) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
244 src_v1 = vec_ld(srcPos + 16, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
245 }
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
246 src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
247
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
248 src_v = // vec_unpackh sign-extends...
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
249 (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
250 // now put our elements in the even slots
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
251 src_v = vec_mergeh(src_v, (vector signed short)vzero);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
252
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
253 filter_v = vec_ld(i << 3, filter);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
254 // the 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
255
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
256 // the neat trick : we only care for half the elements,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
257 // high or low depending on (i<<3)%16 (it's 0 or 8 here),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
258 // and we're going to use vec_mule, so we chose
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
259 // carefully how to "unpack" the elements into the even slots
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
260 if ((i << 3) % 16)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
261 filter_v = vec_mergel(filter_v,(vector signed short)vzero);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
262 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
263 filter_v = vec_mergeh(filter_v,(vector signed short)vzero);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
264
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
265 val_vEven = vec_mule(src_v, filter_v);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
266 val_s = vec_sums(val_vEven, vzero);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
267 vec_st(val_s, 0, tempo);
22321
5f47768cbda5 Add av_ prefix to clip functions
reimar
parents: 21760
diff changeset
268 dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
269 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
270 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
271 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
272
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
273 case 8:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
274 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
275 for(i=0; i<dstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
276 register int srcPos = filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
277
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
278 vector unsigned char src_v0 = vec_ld(srcPos, src);
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
279 vector unsigned char src_v1, src_vF;
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
280 vector signed short src_v, filter_v;
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
281 vector signed int val_v, val_s;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
282 if ((((int)src + srcPos)% 16) > 8) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
283 src_v1 = vec_ld(srcPos + 16, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
284 }
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
285 src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
286
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
287 src_v = // vec_unpackh sign-extends...
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
288 (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
289 filter_v = vec_ld(i << 4, filter);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
290 // the 4 above is 3 (filterSize == 8) + 1 (sizeof(short) == 2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
291
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
292 val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
293 val_s = vec_sums(val_v, vzero);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
294 vec_st(val_s, 0, tempo);
22321
5f47768cbda5 Add av_ prefix to clip functions
reimar
parents: 21760
diff changeset
295 dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
296 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
297 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
298 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
299
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
300 case 16:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
301 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
302 for(i=0; i<dstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
303 register int srcPos = filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
304
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
305 vector unsigned char src_v0 = vec_ld(srcPos, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
306 vector unsigned char src_v1 = vec_ld(srcPos + 16, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
307 vector unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
308
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
309 vector signed short src_vA = // vec_unpackh sign-extends...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
310 (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
311 vector signed short src_vB = // vec_unpackh sign-extends...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
312 (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
313
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
314 vector signed short filter_v0 = vec_ld(i << 5, filter);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
315 vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
316 // the 5 above are 4 (filterSize == 16) + 1 (sizeof(short) == 2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
317
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
318 vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
319 vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
320
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
321 vector signed int val_s = vec_sums(val_v, vzero);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
322
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
323 vec_st(val_s, 0, tempo);
22321
5f47768cbda5 Add av_ prefix to clip functions
reimar
parents: 21760
diff changeset
324 dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
325 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
326 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
327 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
328
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
329 default:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
330 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
331 for(i=0; i<dstW; i++) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
332 register int j;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
333 register int srcPos = filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
334
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
335 vector signed int val_s, val_v = (vector signed int)vzero;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
336 vector signed short filter_v0R = vec_ld(i * 2 * filterSize, filter);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
337 vector unsigned char permF = vec_lvsl((i * 2 * filterSize), filter);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
338
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
339 vector unsigned char src_v0 = vec_ld(srcPos, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
340 vector unsigned char permS = vec_lvsl(srcPos, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
341
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
342 for (j = 0 ; j < filterSize - 15; j += 16) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
343 vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
344 vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
345
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
346 vector signed short src_vA = // vec_unpackh sign-extends...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
347 (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
348 vector signed short src_vB = // vec_unpackh sign-extends...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
349 (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
350
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
351 vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
352 vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
353 vector signed short filter_v0 = vec_perm(filter_v0R, filter_v1R, permF);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
354 vector signed short filter_v1 = vec_perm(filter_v1R, filter_v2R, permF);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
355
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
356 vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
357 val_v = vec_msums(src_vB, filter_v1, val_acc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
358
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
359 filter_v0R = filter_v2R;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
360 src_v0 = src_v1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
361 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
362
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
363 if (j < (filterSize-7)) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
364 // loading src_v0 is useless, it's already done above
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
365 //vector unsigned char src_v0 = vec_ld(srcPos + j, src);
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
366 vector unsigned char src_v1, src_vF;
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
367 vector signed short src_v, filter_v1R, filter_v;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
368 if ((((int)src + srcPos)% 16) > 8) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
369 src_v1 = vec_ld(srcPos + j + 16, src);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
370 }
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
371 src_vF = vec_perm(src_v0, src_v1, permS);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
372
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
373 src_v = // vec_unpackh sign-extends...
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
374 (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
375 // loading filter_v0R is useless, it's already done above
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
376 //vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter);
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
377 filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
378 filter_v = vec_perm(filter_v0R, filter_v1R, permF);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
379
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
380 val_v = vec_msums(src_v, filter_v, val_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
381 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
382
20584
200ec2faa4e1 Do not mix declarations and statements.
diego
parents: 20094
diff changeset
383 val_s = vec_sums(val_v, vzero);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
384
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
385 vec_st(val_s, 0, tempo);
22321
5f47768cbda5 Add av_ prefix to clip functions
reimar
parents: 21760
diff changeset
386 dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
387 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
388
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
389 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
390 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
391 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
392
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
393 static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
394 int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
395 uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
396 // yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
397 uint8_t *ysrc = src[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
398 uint8_t *usrc = src[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
399 uint8_t *vsrc = src[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
400 const int width = c->srcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
401 const int height = srcSliceH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
402 const int lumStride = srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
403 const int chromStride = srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
404 const int dstStride = dstStride_a[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
405 const vector unsigned char yperm = vec_lvsl(0, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
406 const int vertLumPerChroma = 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
407 register unsigned int y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
408
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
409 if(width&15){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
410 yv12toyuy2( ysrc, usrc, vsrc, dst,c->srcW,srcSliceH, lumStride, chromStride, dstStride);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
411 return srcSliceH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
412 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
413
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
414 /* this code assume:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
415
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
416 1) dst is 16 bytes-aligned
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
417 2) dstStride is a multiple of 16
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
418 3) width is a multiple of 16
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
419 4) lum&chrom stride are multiple of 8
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
420 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
421
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
422 for(y=0; y<height; y++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
423 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
424 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
425 for (i = 0; i < width - 31; i+= 32) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
426 const unsigned int j = i >> 1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
427 vector unsigned char v_yA = vec_ld(i, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
428 vector unsigned char v_yB = vec_ld(i + 16, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
429 vector unsigned char v_yC = vec_ld(i + 32, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
430 vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
431 vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
432 vector unsigned char v_uA = vec_ld(j, usrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
433 vector unsigned char v_uB = vec_ld(j + 16, usrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
434 vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
435 vector unsigned char v_vA = vec_ld(j, vsrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
436 vector unsigned char v_vB = vec_ld(j + 16, vsrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
437 vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
438 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
439 vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
440 vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
441 vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
442 vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
443 vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
444 vec_st(v_yuy2_0, (i << 1), dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
445 vec_st(v_yuy2_1, (i << 1) + 16, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
446 vec_st(v_yuy2_2, (i << 1) + 32, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
447 vec_st(v_yuy2_3, (i << 1) + 48, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
448 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
449 if (i < width) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
450 const unsigned int j = i >> 1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
451 vector unsigned char v_y1 = vec_ld(i, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
452 vector unsigned char v_u = vec_ld(j, usrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
453 vector unsigned char v_v = vec_ld(j, vsrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
454 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
455 vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
456 vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
457 vec_st(v_yuy2_0, (i << 1), dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
458 vec_st(v_yuy2_1, (i << 1) + 16, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
459 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
460 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
461 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
462 usrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
463 vsrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
464 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
465 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
466 dst += dstStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
467 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
468
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
469 return srcSliceH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
470 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
471
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
472 static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
473 int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
474 uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
475 // yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
476 uint8_t *ysrc = src[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
477 uint8_t *usrc = src[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
478 uint8_t *vsrc = src[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
479 const int width = c->srcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
480 const int height = srcSliceH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
481 const int lumStride = srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
482 const int chromStride = srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
483 const int dstStride = dstStride_a[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
484 const int vertLumPerChroma = 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
485 const vector unsigned char yperm = vec_lvsl(0, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
486 register unsigned int y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
487
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
488 if(width&15){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
489 yv12touyvy( ysrc, usrc, vsrc, dst,c->srcW,srcSliceH, lumStride, chromStride, dstStride);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
490 return srcSliceH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
491 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
492
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
493 /* this code assume:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
494
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
495 1) dst is 16 bytes-aligned
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
496 2) dstStride is a multiple of 16
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
497 3) width is a multiple of 16
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
498 4) lum&chrom stride are multiple of 8
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
499 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
500
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
501 for(y=0; y<height; y++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
502 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
503 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
504 for (i = 0; i < width - 31; i+= 32) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
505 const unsigned int j = i >> 1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
506 vector unsigned char v_yA = vec_ld(i, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
507 vector unsigned char v_yB = vec_ld(i + 16, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
508 vector unsigned char v_yC = vec_ld(i + 32, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
509 vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
510 vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
511 vector unsigned char v_uA = vec_ld(j, usrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
512 vector unsigned char v_uB = vec_ld(j + 16, usrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
513 vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
514 vector unsigned char v_vA = vec_ld(j, vsrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
515 vector unsigned char v_vB = vec_ld(j + 16, vsrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
516 vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
517 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
518 vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
519 vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
520 vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
521 vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
522 vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
523 vec_st(v_uyvy_0, (i << 1), dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
524 vec_st(v_uyvy_1, (i << 1) + 16, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
525 vec_st(v_uyvy_2, (i << 1) + 32, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
526 vec_st(v_uyvy_3, (i << 1) + 48, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
527 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
528 if (i < width) {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
529 const unsigned int j = i >> 1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
530 vector unsigned char v_y1 = vec_ld(i, ysrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
531 vector unsigned char v_u = vec_ld(j, usrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
532 vector unsigned char v_v = vec_ld(j, vsrc);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
533 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
534 vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
535 vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
536 vec_st(v_uyvy_0, (i << 1), dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
537 vec_st(v_uyvy_1, (i << 1) + 16, dst);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
538 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
539 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
540 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
541 usrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
542 vsrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
543 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
544 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
545 dst += dstStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
546 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
547 return srcSliceH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
548 }