Mercurial > libavcodec.hg
annotate ppc/h264_template_altivec.c @ 12454:f4355cd85faa libavcodec
Port latest x264 deblock asm (before they moved to using NV12 as internal
format), LGPL'ed with permission from Jason and Loren. This includes mmx2
code, so remove inline asm from h264dsp_mmx.c accordingly.
author | rbultje |
---|---|
date | Fri, 03 Sep 2010 16:52:46 +0000 |
parents | 3cd4cd0509cd |
children |
rev | line source |
---|---|
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
1 /* |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
3 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
15 * |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
19 */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
20 |
5603
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
21 //#define DEBUG_ALIGNMENT |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
22 #ifdef DEBUG_ALIGNMENT |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
23 #define ASSERT_ALIGNED(ptr) assert(((unsigned long)ptr&0x0000000F)); |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
24 #else |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
25 #define ASSERT_ALIGNED(ptr) ; |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
26 #endif |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
27 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
28 /* this code assume that stride % 16 == 0 */ |
6059 | 29 |
9444 | 30 #define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \ |
8494 | 31 vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\ |
32 vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\ | |
6059 | 33 \ |
9444 | 34 psum = vec_mladd(vA, vsrc0ssH, BIAS1);\ |
6059 | 35 psum = vec_mladd(vB, vsrc1ssH, psum);\ |
36 psum = vec_mladd(vC, vsrc2ssH, psum);\ | |
37 psum = vec_mladd(vD, vsrc3ssH, psum);\ | |
9444 | 38 psum = BIAS2(psum);\ |
6059 | 39 psum = vec_sr(psum, v6us);\ |
40 \ | |
41 vdst = vec_ld(0, dst);\ | |
8494 | 42 ppsum = (vec_u8)vec_pack(psum, psum);\ |
6059 | 43 vfdst = vec_perm(vdst, ppsum, fperm);\ |
44 \ | |
45 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ | |
46 \ | |
47 vec_st(fsum, 0, dst);\ | |
48 \ | |
49 vsrc0ssH = vsrc2ssH;\ | |
50 vsrc1ssH = vsrc3ssH;\ | |
51 \ | |
52 dst += stride;\ | |
53 src += stride; | |
54 | |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
55 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ |
6064
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
56 \ |
8494 | 57 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\ |
58 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\ | |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
59 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
60 psum = vec_mladd(vA, vsrc0ssH, v32ss);\ |
6064
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
61 psum = vec_mladd(vE, vsrc1ssH, psum);\ |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
62 psum = vec_sr(psum, v6us);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
63 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
64 vdst = vec_ld(0, dst);\ |
8494 | 65 ppsum = (vec_u8)vec_pack(psum, psum);\ |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
66 vfdst = vec_perm(vdst, ppsum, fperm);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
67 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
68 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
69 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
70 vec_st(fsum, 0, dst);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
71 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
72 dst += stride;\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
73 src += stride; |
6059 | 74 |
9444 | 75 #define noop(a) a |
76 #define add28(a) vec_add(v28ss, a) | |
77 | |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
11369
diff
changeset
|
78 static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, |
6063 | 79 int stride, int h, int x, int y) { |
11369 | 80 DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
81 {((8 - x) * (8 - y)), |
6058 | 82 (( x) * (8 - y)), |
83 ((8 - x) * ( y)), | |
84 (( x) * ( y))}; | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
85 register int i; |
8494 | 86 vec_u8 fperm; |
87 const vec_s32 vABCD = vec_ld(0, ABCD); | |
88 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); | |
89 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); | |
90 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); | |
91 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); | |
5530
cd266411b11a
use shorter types vec_"type" instead of the too long vector "type"
gpoirier
parents:
5019
diff
changeset
|
92 LOAD_ZERO; |
8494 | 93 const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); |
94 const vec_u16 v6us = vec_splat_u16(6); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
95 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
96 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
2967 | 97 |
9166
40738baaafc2
Add av_uninit to vsrcBuc variable to work around some
diego
parents:
8494
diff
changeset
|
98 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
8494 | 99 vec_u8 vsrc0uc, vsrc1uc; |
100 vec_s16 vsrc0ssH, vsrc1ssH; | |
101 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; | |
102 vec_s16 vsrc2ssH, vsrc3ssH, psum; | |
103 vec_u8 vdst, ppsum, vfdst, fsum; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
104 |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
105 if (((unsigned long)dst) % 16 == 0) { |
8494 | 106 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
9167 | 107 0x14, 0x15, 0x16, 0x17, |
108 0x08, 0x09, 0x0A, 0x0B, | |
109 0x0C, 0x0D, 0x0E, 0x0F}; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
110 } else { |
8494 | 111 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
9167 | 112 0x04, 0x05, 0x06, 0x07, |
113 0x18, 0x19, 0x1A, 0x1B, | |
114 0x1C, 0x1D, 0x1E, 0x1F}; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
115 } |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
116 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
117 vsrcAuc = vec_ld(0, src); |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
118 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
119 if (loadSecond) |
6060 | 120 vsrcBuc = vec_ld(16, src); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
121 vsrcperm0 = vec_lvsl(0, src); |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
122 vsrcperm1 = vec_lvsl(1, src); |
2967 | 123 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
124 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
125 if (reallyBadAlign) |
6060 | 126 vsrc1uc = vsrcBuc; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
127 else |
6060 | 128 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
2967 | 129 |
8494 | 130 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc); |
131 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
132 |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
133 if (ABCD[3]) { |
6063 | 134 if (!loadSecond) {// -> !reallyBadAlign |
135 for (i = 0 ; i < h ; i++) { | |
136 vsrcCuc = vec_ld(stride + 0, src); | |
137 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
138 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
2967 | 139 |
9444 | 140 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
6063 | 141 } |
142 } else { | |
8494 | 143 vec_u8 vsrcDuc; |
6063 | 144 for (i = 0 ; i < h ; i++) { |
145 vsrcCuc = vec_ld(stride + 0, src); | |
146 vsrcDuc = vec_ld(stride + 16, src); | |
147 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
148 if (reallyBadAlign) | |
149 vsrc3uc = vsrcDuc; | |
150 else | |
151 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
152 | |
9444 | 153 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
6063 | 154 } |
6060 | 155 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
156 } else { |
8494 | 157 const vec_s16 vE = vec_add(vB, vC); |
6065 | 158 if (ABCD[2]) { // x == 0 B == 0 |
6067 | 159 if (!loadSecond) {// -> !reallyBadAlign |
160 for (i = 0 ; i < h ; i++) { | |
161 vsrcCuc = vec_ld(stride + 0, src); | |
162 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
163 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
2967 | 164 |
6067 | 165 vsrc0uc = vsrc1uc; |
166 } | |
167 } else { | |
8494 | 168 vec_u8 vsrcDuc; |
6067 | 169 for (i = 0 ; i < h ; i++) { |
170 vsrcCuc = vec_ld(stride + 0, src); | |
171 vsrcDuc = vec_ld(stride + 15, src); | |
172 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
173 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
174 | |
175 vsrc0uc = vsrc1uc; | |
176 } | |
6063 | 177 } |
6065 | 178 } else { // y == 0 C == 0 |
6067 | 179 if (!loadSecond) {// -> !reallyBadAlign |
180 for (i = 0 ; i < h ; i++) { | |
181 vsrcCuc = vec_ld(0, src); | |
182 vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
183 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
6065 | 184 |
6067 | 185 CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
186 } | |
187 } else { | |
8494 | 188 vec_u8 vsrcDuc; |
6067 | 189 for (i = 0 ; i < h ; i++) { |
190 vsrcCuc = vec_ld(0, src); | |
191 vsrcDuc = vec_ld(15, src); | |
192 vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
193 if (reallyBadAlign) | |
194 vsrc1uc = vsrcDuc; | |
195 else | |
196 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
197 | |
198 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
199 } | |
6064
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
200 } |
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
201 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
202 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
203 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
204 |
9444 | 205 /* this code assume that stride % 16 == 0 */ |
11382
50415a8f1451
PPC: move prototypes to headers and make some functions static
mru
parents:
11369
diff
changeset
|
206 static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { |
11369 | 207 DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
9444 | 208 {((8 - x) * (8 - y)), |
209 (( x) * (8 - y)), | |
210 ((8 - x) * ( y)), | |
211 (( x) * ( y))}; | |
212 register int i; | |
213 vec_u8 fperm; | |
214 const vec_s32 vABCD = vec_ld(0, ABCD); | |
215 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); | |
216 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); | |
217 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); | |
218 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); | |
219 LOAD_ZERO; | |
220 const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); | |
221 const vec_u16 v6us = vec_splat_u16(6); | |
222 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; | |
223 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; | |
224 | |
225 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; | |
226 vec_u8 vsrc0uc, vsrc1uc; | |
227 vec_s16 vsrc0ssH, vsrc1ssH; | |
228 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; | |
229 vec_s16 vsrc2ssH, vsrc3ssH, psum; | |
230 vec_u8 vdst, ppsum, vfdst, fsum; | |
231 | |
232 if (((unsigned long)dst) % 16 == 0) { | |
233 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, | |
234 0x14, 0x15, 0x16, 0x17, | |
235 0x08, 0x09, 0x0A, 0x0B, | |
236 0x0C, 0x0D, 0x0E, 0x0F}; | |
237 } else { | |
238 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, | |
239 0x04, 0x05, 0x06, 0x07, | |
240 0x18, 0x19, 0x1A, 0x1B, | |
241 0x1C, 0x1D, 0x1E, 0x1F}; | |
242 } | |
243 | |
244 vsrcAuc = vec_ld(0, src); | |
245 | |
246 if (loadSecond) | |
247 vsrcBuc = vec_ld(16, src); | |
248 vsrcperm0 = vec_lvsl(0, src); | |
249 vsrcperm1 = vec_lvsl(1, src); | |
250 | |
251 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); | |
252 if (reallyBadAlign) | |
253 vsrc1uc = vsrcBuc; | |
254 else | |
255 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); | |
256 | |
257 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc); | |
258 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc); | |
259 | |
260 if (!loadSecond) {// -> !reallyBadAlign | |
261 for (i = 0 ; i < h ; i++) { | |
262 | |
263 | |
264 vsrcCuc = vec_ld(stride + 0, src); | |
265 | |
266 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
267 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
268 | |
269 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) | |
270 } | |
271 } else { | |
272 vec_u8 vsrcDuc; | |
273 for (i = 0 ; i < h ; i++) { | |
274 vsrcCuc = vec_ld(stride + 0, src); | |
275 vsrcDuc = vec_ld(stride + 16, src); | |
276 | |
277 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
278 if (reallyBadAlign) | |
279 vsrc3uc = vsrcDuc; | |
280 else | |
281 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
282 | |
283 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) | |
284 } | |
285 } | |
286 } | |
287 | |
288 #undef noop | |
289 #undef add28 | |
6059 | 290 #undef CHROMA_MC8_ALTIVEC_CORE |
291 | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
292 /* this code assume stride % 16 == 0 */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
293 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
294 register int i; |
2967 | 295 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
296 LOAD_ZERO; |
8494 | 297 const vec_u8 permM2 = vec_lvsl(-2, src); |
298 const vec_u8 permM1 = vec_lvsl(-1, src); | |
299 const vec_u8 permP0 = vec_lvsl(+0, src); | |
300 const vec_u8 permP1 = vec_lvsl(+1, src); | |
301 const vec_u8 permP2 = vec_lvsl(+2, src); | |
302 const vec_u8 permP3 = vec_lvsl(+3, src); | |
303 const vec_s16 v5ss = vec_splat_s16(5); | |
304 const vec_u16 v5us = vec_splat_u16(5); | |
305 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); | |
306 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
307 |
8494 | 308 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
309 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
310 register int align = ((((unsigned long)src) - 2) % 16); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
311 |
8494 | 312 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
313 srcP2A, srcP2B, srcP3A, srcP3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
314 srcM1A, srcM1B, srcM2A, srcM2B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
315 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
316 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
317 psumA, psumB, sumA, sumB; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
318 |
8494 | 319 vec_u8 sum, vdst, fsum; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
320 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
321 for (i = 0 ; i < 16 ; i ++) { |
8494 | 322 vec_u8 srcR1 = vec_ld(-2, src); |
323 vec_u8 srcR2 = vec_ld(14, src); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
324 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
325 switch (align) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
326 default: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
327 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
328 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
329 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
330 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
331 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
332 srcP3 = vec_perm(srcR1, srcR2, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
333 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
334 case 11: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
335 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
336 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
337 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
338 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
339 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
340 srcP3 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
341 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
342 case 12: { |
8494 | 343 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
344 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
345 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
346 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
347 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
348 srcP2 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
349 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
350 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
351 case 13: { |
8494 | 352 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
353 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
354 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
355 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
356 srcP1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
357 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
358 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
359 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
360 case 14: { |
8494 | 361 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
362 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
363 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
364 srcP0 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
365 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
366 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
367 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
368 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
369 case 15: { |
8494 | 370 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
371 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
372 srcM1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
373 srcP0 = vec_perm(srcR2, srcR3, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
374 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
375 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
376 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
377 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
378 } |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
379 |
8494 | 380 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0); |
381 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0); | |
382 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1); | |
383 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
384 |
8494 | 385 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2); |
386 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2); | |
387 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3); | |
388 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
389 |
8494 | 390 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1); |
391 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1); | |
392 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2); | |
393 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
394 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
395 sum1A = vec_adds(srcP0A, srcP1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
396 sum1B = vec_adds(srcP0B, srcP1B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
397 sum2A = vec_adds(srcM1A, srcP2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
398 sum2B = vec_adds(srcM1B, srcP2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
399 sum3A = vec_adds(srcM2A, srcP3A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
400 sum3B = vec_adds(srcM2B, srcP3B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
401 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
402 pp1A = vec_mladd(sum1A, v20ss, v16ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
403 pp1B = vec_mladd(sum1B, v20ss, v16ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
404 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
405 pp2A = vec_mladd(sum2A, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
406 pp2B = vec_mladd(sum2B, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
407 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
408 pp3A = vec_add(sum3A, pp1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
409 pp3B = vec_add(sum3B, pp1B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
410 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
411 psumA = vec_sub(pp3A, pp2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
412 psumB = vec_sub(pp3B, pp2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
413 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
414 sumA = vec_sra(psumA, v5us); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
415 sumB = vec_sra(psumB, v5us); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
416 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
417 sum = vec_packsu(sumA, sumB); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
418 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
419 ASSERT_ALIGNED(dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
420 vdst = vec_ld(0, dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
421 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
422 OP_U8_ALTIVEC(fsum, sum, vdst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
423 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
424 vec_st(fsum, 0, dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
425 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
426 src += srcStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
427 dst += dstStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
428 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
429 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
430 |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
431 /* this code assume stride % 16 == 0 */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
432 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
433 register int i; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
434 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
435 LOAD_ZERO; |
8494 | 436 const vec_u8 perm = vec_lvsl(0, src); |
437 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); | |
438 const vec_u16 v5us = vec_splat_u16(5); | |
439 const vec_s16 v5ss = vec_splat_s16(5); | |
440 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); | |
2967 | 441 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
442 uint8_t *srcbis = src - (srcStride * 2); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
443 |
8494 | 444 const vec_u8 srcM2a = vec_ld(0, srcbis); |
445 const vec_u8 srcM2b = vec_ld(16, srcbis); | |
446 const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
447 //srcbis += srcStride; |
8494 | 448 const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride); |
449 const vec_u8 srcM1b = vec_ld(16, srcbis); | |
450 const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
451 //srcbis += srcStride; |
8494 | 452 const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride); |
453 const vec_u8 srcP0b = vec_ld(16, srcbis); | |
454 const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
455 //srcbis += srcStride; |
8494 | 456 const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride); |
457 const vec_u8 srcP1b = vec_ld(16, srcbis); | |
458 const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
459 //srcbis += srcStride; |
8494 | 460 const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride); |
461 const vec_u8 srcP2b = vec_ld(16, srcbis); | |
462 const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
463 //srcbis += srcStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
464 |
8494 | 465 vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2); |
466 vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2); | |
467 vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1); | |
468 vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1); | |
469 vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0); | |
470 vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0); | |
471 vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1); | |
472 vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1); | |
473 vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2); | |
474 vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2); | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
475 |
8494 | 476 vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
477 psumA, psumB, sumA, sumB, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
478 srcP3ssA, srcP3ssB, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
479 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
480 |
8494 | 481 vec_u8 sum, vdst, fsum, srcP3a, srcP3b, srcP3; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
482 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
483 for (i = 0 ; i < 16 ; i++) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
484 srcP3a = vec_ld(0, srcbis += srcStride); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
485 srcP3b = vec_ld(16, srcbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
486 srcP3 = vec_perm(srcP3a, srcP3b, perm); |
8494 | 487 srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3); |
488 srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
489 //srcbis += srcStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
490 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
491 sum1A = vec_adds(srcP0ssA, srcP1ssA); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
492 sum1B = vec_adds(srcP0ssB, srcP1ssB); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
493 sum2A = vec_adds(srcM1ssA, srcP2ssA); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
494 sum2B = vec_adds(srcM1ssB, srcP2ssB); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
495 sum3A = vec_adds(srcM2ssA, srcP3ssA); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
496 sum3B = vec_adds(srcM2ssB, srcP3ssB); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
497 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
498 srcM2ssA = srcM1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
499 srcM2ssB = srcM1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
500 srcM1ssA = srcP0ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
501 srcM1ssB = srcP0ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
502 srcP0ssA = srcP1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
503 srcP0ssB = srcP1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
504 srcP1ssA = srcP2ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
505 srcP1ssB = srcP2ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
506 srcP2ssA = srcP3ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
507 srcP2ssB = srcP3ssB; |
2967 | 508 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
509 pp1A = vec_mladd(sum1A, v20ss, v16ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
510 pp1B = vec_mladd(sum1B, v20ss, v16ss); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
511 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
512 pp2A = vec_mladd(sum2A, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
513 pp2B = vec_mladd(sum2B, v5ss, zero_s16v); |
2967 | 514 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
515 pp3A = vec_add(sum3A, pp1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
516 pp3B = vec_add(sum3B, pp1B); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
517 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
518 psumA = vec_sub(pp3A, pp2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
519 psumB = vec_sub(pp3B, pp2B); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
520 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
521 sumA = vec_sra(psumA, v5us); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
522 sumB = vec_sra(psumB, v5us); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
523 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
524 sum = vec_packsu(sumA, sumB); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
525 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
526 ASSERT_ALIGNED(dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
527 vdst = vec_ld(0, dst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
528 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
529 OP_U8_ALTIVEC(fsum, sum, vdst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
530 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
531 vec_st(fsum, 0, dst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
532 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
533 dst += dstStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
534 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
535 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
536 |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
537 /* this code assume stride % 16 == 0 *and* tmp is properly aligned */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
538 static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
539 register int i; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
540 LOAD_ZERO; |
8494 | 541 const vec_u8 permM2 = vec_lvsl(-2, src); |
542 const vec_u8 permM1 = vec_lvsl(-1, src); | |
543 const vec_u8 permP0 = vec_lvsl(+0, src); | |
544 const vec_u8 permP1 = vec_lvsl(+1, src); | |
545 const vec_u8 permP2 = vec_lvsl(+2, src); | |
546 const vec_u8 permP3 = vec_lvsl(+3, src); | |
547 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); | |
548 const vec_u32 v10ui = vec_splat_u32(10); | |
549 const vec_s16 v5ss = vec_splat_s16(5); | |
550 const vec_s16 v1ss = vec_splat_s16(1); | |
551 const vec_s32 v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9)); | |
552 const vec_u32 v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4)); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
553 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
554 register int align = ((((unsigned long)src) - 2) % 16); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
555 |
8494 | 556 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
557 srcP2A, srcP2B, srcP3A, srcP3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
558 srcM1A, srcM1B, srcM2A, srcM2B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
559 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
560 pp1A, pp1B, pp2A, pp2B, psumA, psumB; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
561 |
8494 | 562 const vec_u8 mperm = (const vec_u8) |
7373
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
7333
diff
changeset
|
563 {0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B, |
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
7333
diff
changeset
|
564 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F}; |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
565 int16_t *tmpbis = tmp; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
566 |
8494 | 567 vec_s16 tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
568 tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
569 tmpP2ssA, tmpP2ssB; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
570 |
8494 | 571 vec_s32 pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
572 pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
573 pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
574 ssumAe, ssumAo, ssumBe, ssumBo; |
8494 | 575 vec_u8 fsum, sumv, sum, vdst; |
576 vec_s16 ssume, ssumo; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
577 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
578 src -= (2 * srcStride); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
579 for (i = 0 ; i < 21 ; i ++) { |
8494 | 580 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; |
581 vec_u8 srcR1 = vec_ld(-2, src); | |
582 vec_u8 srcR2 = vec_ld(14, src); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
583 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
584 switch (align) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
585 default: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
586 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
587 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
588 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
589 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
590 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
591 srcP3 = vec_perm(srcR1, srcR2, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
592 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
593 case 11: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
594 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
595 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
596 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
597 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
598 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
599 srcP3 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
600 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
601 case 12: { |
8494 | 602 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
603 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
604 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
605 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
606 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
607 srcP2 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
608 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
609 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
610 case 13: { |
8494 | 611 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
612 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
613 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
614 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
615 srcP1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
616 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
617 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
618 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
619 case 14: { |
8494 | 620 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
621 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
622 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
623 srcP0 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
624 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
625 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
626 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
627 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
628 case 15: { |
8494 | 629 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
630 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
631 srcM1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
632 srcP0 = vec_perm(srcR2, srcR3, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
633 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
634 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
635 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
636 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
637 } |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
638 |
8494 | 639 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0); |
640 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0); | |
641 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1); | |
642 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
643 |
8494 | 644 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2); |
645 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2); | |
646 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3); | |
647 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
648 |
8494 | 649 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1); |
650 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1); | |
651 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2); | |
652 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
653 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
654 sum1A = vec_adds(srcP0A, srcP1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
655 sum1B = vec_adds(srcP0B, srcP1B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
656 sum2A = vec_adds(srcM1A, srcP2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
657 sum2B = vec_adds(srcM1B, srcP2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
658 sum3A = vec_adds(srcM2A, srcP3A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
659 sum3B = vec_adds(srcM2B, srcP3B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
660 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
661 pp1A = vec_mladd(sum1A, v20ss, sum3A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
662 pp1B = vec_mladd(sum1B, v20ss, sum3B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
663 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
664 pp2A = vec_mladd(sum2A, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
665 pp2B = vec_mladd(sum2B, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
666 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
667 psumA = vec_sub(pp1A, pp2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
668 psumB = vec_sub(pp1B, pp2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
669 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
670 vec_st(psumA, 0, tmp); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
671 vec_st(psumB, 16, tmp); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
672 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
673 src += srcStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
674 tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */ |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
675 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
676 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
677 tmpM2ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
678 tmpM2ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
679 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
680 tmpM1ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
681 tmpM1ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
682 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
683 tmpP0ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
684 tmpP0ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
685 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
686 tmpP1ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
687 tmpP1ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
688 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
689 tmpP2ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
690 tmpP2ssB = vec_ld(16, tmpbis); |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
691 tmpbis += tmpStride; |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
692 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
693 for (i = 0 ; i < 16 ; i++) { |
8494 | 694 const vec_s16 tmpP3ssA = vec_ld(0, tmpbis); |
695 const vec_s16 tmpP3ssB = vec_ld(16, tmpbis); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
696 |
8494 | 697 const vec_s16 sum1A = vec_adds(tmpP0ssA, tmpP1ssA); |
698 const vec_s16 sum1B = vec_adds(tmpP0ssB, tmpP1ssB); | |
699 const vec_s16 sum2A = vec_adds(tmpM1ssA, tmpP2ssA); | |
700 const vec_s16 sum2B = vec_adds(tmpM1ssB, tmpP2ssB); | |
701 const vec_s16 sum3A = vec_adds(tmpM2ssA, tmpP3ssA); | |
702 const vec_s16 sum3B = vec_adds(tmpM2ssB, tmpP3ssB); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
703 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
704 tmpbis += tmpStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
705 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
706 tmpM2ssA = tmpM1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
707 tmpM2ssB = tmpM1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
708 tmpM1ssA = tmpP0ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
709 tmpM1ssB = tmpP0ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
710 tmpP0ssA = tmpP1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
711 tmpP0ssB = tmpP1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
712 tmpP1ssA = tmpP2ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
713 tmpP1ssB = tmpP2ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
714 tmpP2ssA = tmpP3ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
715 tmpP2ssB = tmpP3ssB; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
716 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
717 pp1Ae = vec_mule(sum1A, v20ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
718 pp1Ao = vec_mulo(sum1A, v20ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
719 pp1Be = vec_mule(sum1B, v20ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
720 pp1Bo = vec_mulo(sum1B, v20ss); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
721 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
722 pp2Ae = vec_mule(sum2A, v5ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
723 pp2Ao = vec_mulo(sum2A, v5ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
724 pp2Be = vec_mule(sum2B, v5ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
725 pp2Bo = vec_mulo(sum2B, v5ss); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
726 |
8494 | 727 pp3Ae = vec_sra((vec_s32)sum3A, v16ui); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
728 pp3Ao = vec_mulo(sum3A, v1ss); |
8494 | 729 pp3Be = vec_sra((vec_s32)sum3B, v16ui); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
730 pp3Bo = vec_mulo(sum3B, v1ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
731 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
732 pp1cAe = vec_add(pp1Ae, v512si); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
733 pp1cAo = vec_add(pp1Ao, v512si); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
734 pp1cBe = vec_add(pp1Be, v512si); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
735 pp1cBo = vec_add(pp1Bo, v512si); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
736 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
737 pp32Ae = vec_sub(pp3Ae, pp2Ae); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
738 pp32Ao = vec_sub(pp3Ao, pp2Ao); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
739 pp32Be = vec_sub(pp3Be, pp2Be); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
740 pp32Bo = vec_sub(pp3Bo, pp2Bo); |
2967 | 741 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
742 sumAe = vec_add(pp1cAe, pp32Ae); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
743 sumAo = vec_add(pp1cAo, pp32Ao); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
744 sumBe = vec_add(pp1cBe, pp32Be); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
745 sumBo = vec_add(pp1cBo, pp32Bo); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
746 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
747 ssumAe = vec_sra(sumAe, v10ui); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
748 ssumAo = vec_sra(sumAo, v10ui); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
749 ssumBe = vec_sra(sumBe, v10ui); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
750 ssumBo = vec_sra(sumBo, v10ui); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
751 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
752 ssume = vec_packs(ssumAe, ssumBe); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
753 ssumo = vec_packs(ssumAo, ssumBo); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
754 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
755 sumv = vec_packsu(ssume, ssumo); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
756 sum = vec_perm(sumv, sumv, mperm); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
757 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
758 ASSERT_ALIGNED(dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
759 vdst = vec_ld(0, dst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
760 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
761 OP_U8_ALTIVEC(fsum, sum, vdst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
762 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
763 vec_st(fsum, 0, dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
764 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
765 dst += dstStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
766 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
767 } |