Mercurial > libavcodec.hg
annotate ppc/h264_template_altivec.c @ 10936:b2ea6b0d17bf libavcodec
Update libx264.c to use new libx264 features
With b_keyframe instead of IDR for detecting keyframes, ffmpeg should now
support periodic encoding with periodic intra refresh (although there is no
interface option for it yet).
Set the new timebase values for full VFR input support.
Bump configure to check for API version 83.
author | darkshikari |
---|---|
date | Tue, 19 Jan 2010 04:00:08 +0000 |
parents | fe17033a79ed |
children | 34a65026fa06 |
rev | line source |
---|---|
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
1 /* |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
3 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
15 * |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3577
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
19 */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
20 |
5603
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
21 //#define DEBUG_ALIGNMENT |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
22 #ifdef DEBUG_ALIGNMENT |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
23 #define ASSERT_ALIGNED(ptr) assert(((unsigned long)ptr&0x0000000F)); |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
24 #else |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
25 #define ASSERT_ALIGNED(ptr) ; |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
26 #endif |
861eb234e6ba
remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents:
5530
diff
changeset
|
27 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
28 /* this code assume that stride % 16 == 0 */ |
6059 | 29 |
9444 | 30 #define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \ |
8494 | 31 vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\ |
32 vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\ | |
6059 | 33 \ |
9444 | 34 psum = vec_mladd(vA, vsrc0ssH, BIAS1);\ |
6059 | 35 psum = vec_mladd(vB, vsrc1ssH, psum);\ |
36 psum = vec_mladd(vC, vsrc2ssH, psum);\ | |
37 psum = vec_mladd(vD, vsrc3ssH, psum);\ | |
9444 | 38 psum = BIAS2(psum);\ |
6059 | 39 psum = vec_sr(psum, v6us);\ |
40 \ | |
41 vdst = vec_ld(0, dst);\ | |
8494 | 42 ppsum = (vec_u8)vec_pack(psum, psum);\ |
6059 | 43 vfdst = vec_perm(vdst, ppsum, fperm);\ |
44 \ | |
45 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ | |
46 \ | |
47 vec_st(fsum, 0, dst);\ | |
48 \ | |
49 vsrc0ssH = vsrc2ssH;\ | |
50 vsrc1ssH = vsrc3ssH;\ | |
51 \ | |
52 dst += stride;\ | |
53 src += stride; | |
54 | |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
55 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ |
6064
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
56 \ |
8494 | 57 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\ |
58 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\ | |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
59 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
60 psum = vec_mladd(vA, vsrc0ssH, v32ss);\ |
6064
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
61 psum = vec_mladd(vE, vsrc1ssH, psum);\ |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
62 psum = vec_sr(psum, v6us);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
63 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
64 vdst = vec_ld(0, dst);\ |
8494 | 65 ppsum = (vec_u8)vec_pack(psum, psum);\ |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
66 vfdst = vec_perm(vdst, ppsum, fperm);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
67 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
68 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
69 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
70 vec_st(fsum, 0, dst);\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
71 \ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
72 dst += stride;\ |
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
73 src += stride; |
6059 | 74 |
9444 | 75 #define noop(a) a |
76 #define add28(a) vec_add(v28ss, a) | |
77 | |
6063 | 78 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, |
79 int stride, int h, int x, int y) { | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
80 POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); |
5019
41cabe79ba25
use macro Use DECLARE_ALIGNED_16 to align stack-allocated variables
gpoirier
parents:
3947
diff
changeset
|
81 DECLARE_ALIGNED_16(signed int, ABCD[4]) = |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
82 {((8 - x) * (8 - y)), |
6058 | 83 (( x) * (8 - y)), |
84 ((8 - x) * ( y)), | |
85 (( x) * ( y))}; | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
86 register int i; |
8494 | 87 vec_u8 fperm; |
88 const vec_s32 vABCD = vec_ld(0, ABCD); | |
89 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); | |
90 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); | |
91 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); | |
92 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); | |
5530
cd266411b11a
use shorter types vec_"type" instead of the too long vector "type"
gpoirier
parents:
5019
diff
changeset
|
93 LOAD_ZERO; |
8494 | 94 const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); |
95 const vec_u16 v6us = vec_splat_u16(6); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
96 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
97 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
2967 | 98 |
9166
40738baaafc2
Add av_uninit to vsrcBuc variable to work around some
diego
parents:
8494
diff
changeset
|
99 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
8494 | 100 vec_u8 vsrc0uc, vsrc1uc; |
101 vec_s16 vsrc0ssH, vsrc1ssH; | |
102 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; | |
103 vec_s16 vsrc2ssH, vsrc3ssH, psum; | |
104 vec_u8 vdst, ppsum, vfdst, fsum; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
105 |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
106 POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1); |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
107 |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
108 if (((unsigned long)dst) % 16 == 0) { |
8494 | 109 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
9167 | 110 0x14, 0x15, 0x16, 0x17, |
111 0x08, 0x09, 0x0A, 0x0B, | |
112 0x0C, 0x0D, 0x0E, 0x0F}; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
113 } else { |
8494 | 114 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
9167 | 115 0x04, 0x05, 0x06, 0x07, |
116 0x18, 0x19, 0x1A, 0x1B, | |
117 0x1C, 0x1D, 0x1E, 0x1F}; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
118 } |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
119 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
120 vsrcAuc = vec_ld(0, src); |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
121 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
122 if (loadSecond) |
6060 | 123 vsrcBuc = vec_ld(16, src); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
124 vsrcperm0 = vec_lvsl(0, src); |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
125 vsrcperm1 = vec_lvsl(1, src); |
2967 | 126 |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
127 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
128 if (reallyBadAlign) |
6060 | 129 vsrc1uc = vsrcBuc; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
130 else |
6060 | 131 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
2967 | 132 |
8494 | 133 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc); |
134 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
135 |
6062
9d1590a4df90
Partially address issue299, no performance change apparently
lu_zero
parents:
6061
diff
changeset
|
136 if (ABCD[3]) { |
6063 | 137 if (!loadSecond) {// -> !reallyBadAlign |
138 for (i = 0 ; i < h ; i++) { | |
139 vsrcCuc = vec_ld(stride + 0, src); | |
140 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
141 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
2967 | 142 |
9444 | 143 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
6063 | 144 } |
145 } else { | |
8494 | 146 vec_u8 vsrcDuc; |
6063 | 147 for (i = 0 ; i < h ; i++) { |
148 vsrcCuc = vec_ld(stride + 0, src); | |
149 vsrcDuc = vec_ld(stride + 16, src); | |
150 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
151 if (reallyBadAlign) | |
152 vsrc3uc = vsrcDuc; | |
153 else | |
154 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
155 | |
9444 | 156 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
6063 | 157 } |
6060 | 158 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
159 } else { |
8494 | 160 const vec_s16 vE = vec_add(vB, vC); |
6065 | 161 if (ABCD[2]) { // x == 0 B == 0 |
6067 | 162 if (!loadSecond) {// -> !reallyBadAlign |
163 for (i = 0 ; i < h ; i++) { | |
164 vsrcCuc = vec_ld(stride + 0, src); | |
165 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
166 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
2967 | 167 |
6067 | 168 vsrc0uc = vsrc1uc; |
169 } | |
170 } else { | |
8494 | 171 vec_u8 vsrcDuc; |
6067 | 172 for (i = 0 ; i < h ; i++) { |
173 vsrcCuc = vec_ld(stride + 0, src); | |
174 vsrcDuc = vec_ld(stride + 15, src); | |
175 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
176 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
177 | |
178 vsrc0uc = vsrc1uc; | |
179 } | |
6063 | 180 } |
6065 | 181 } else { // y == 0 C == 0 |
6067 | 182 if (!loadSecond) {// -> !reallyBadAlign |
183 for (i = 0 ; i < h ; i++) { | |
184 vsrcCuc = vec_ld(0, src); | |
185 vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
186 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
6065 | 187 |
6067 | 188 CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
189 } | |
190 } else { | |
8494 | 191 vec_u8 vsrcDuc; |
6067 | 192 for (i = 0 ; i < h ; i++) { |
193 vsrcCuc = vec_ld(0, src); | |
194 vsrcDuc = vec_ld(15, src); | |
195 vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
196 if (reallyBadAlign) | |
197 vsrc1uc = vsrcDuc; | |
198 else | |
199 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
200 | |
201 CHROMA_MC8_ALTIVEC_CORE_SIMPLE | |
202 } | |
6064
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
203 } |
62d040333d51
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents:
6063
diff
changeset
|
204 } |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
205 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
206 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
207 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
208 |
9444 | 209 /* this code assume that stride % 16 == 0 */ |
210 void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | |
211 DECLARE_ALIGNED_16(signed int, ABCD[4]) = | |
212 {((8 - x) * (8 - y)), | |
213 (( x) * (8 - y)), | |
214 ((8 - x) * ( y)), | |
215 (( x) * ( y))}; | |
216 register int i; | |
217 vec_u8 fperm; | |
218 const vec_s32 vABCD = vec_ld(0, ABCD); | |
219 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); | |
220 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); | |
221 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); | |
222 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); | |
223 LOAD_ZERO; | |
224 const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); | |
225 const vec_u16 v6us = vec_splat_u16(6); | |
226 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; | |
227 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; | |
228 | |
229 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; | |
230 vec_u8 vsrc0uc, vsrc1uc; | |
231 vec_s16 vsrc0ssH, vsrc1ssH; | |
232 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; | |
233 vec_s16 vsrc2ssH, vsrc3ssH, psum; | |
234 vec_u8 vdst, ppsum, vfdst, fsum; | |
235 | |
236 if (((unsigned long)dst) % 16 == 0) { | |
237 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, | |
238 0x14, 0x15, 0x16, 0x17, | |
239 0x08, 0x09, 0x0A, 0x0B, | |
240 0x0C, 0x0D, 0x0E, 0x0F}; | |
241 } else { | |
242 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, | |
243 0x04, 0x05, 0x06, 0x07, | |
244 0x18, 0x19, 0x1A, 0x1B, | |
245 0x1C, 0x1D, 0x1E, 0x1F}; | |
246 } | |
247 | |
248 vsrcAuc = vec_ld(0, src); | |
249 | |
250 if (loadSecond) | |
251 vsrcBuc = vec_ld(16, src); | |
252 vsrcperm0 = vec_lvsl(0, src); | |
253 vsrcperm1 = vec_lvsl(1, src); | |
254 | |
255 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); | |
256 if (reallyBadAlign) | |
257 vsrc1uc = vsrcBuc; | |
258 else | |
259 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); | |
260 | |
261 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc); | |
262 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc); | |
263 | |
264 if (!loadSecond) {// -> !reallyBadAlign | |
265 for (i = 0 ; i < h ; i++) { | |
266 | |
267 | |
268 vsrcCuc = vec_ld(stride + 0, src); | |
269 | |
270 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); | |
271 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); | |
272 | |
273 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) | |
274 } | |
275 } else { | |
276 vec_u8 vsrcDuc; | |
277 for (i = 0 ; i < h ; i++) { | |
278 vsrcCuc = vec_ld(stride + 0, src); | |
279 vsrcDuc = vec_ld(stride + 16, src); | |
280 | |
281 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); | |
282 if (reallyBadAlign) | |
283 vsrc3uc = vsrcDuc; | |
284 else | |
285 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); | |
286 | |
287 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) | |
288 } | |
289 } | |
290 } | |
291 | |
292 #undef noop | |
293 #undef add28 | |
6059 | 294 #undef CHROMA_MC8_ALTIVEC_CORE |
295 | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
296 /* this code assume stride % 16 == 0 */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
297 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
298 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
299 register int i; |
2967 | 300 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
301 LOAD_ZERO; |
8494 | 302 const vec_u8 permM2 = vec_lvsl(-2, src); |
303 const vec_u8 permM1 = vec_lvsl(-1, src); | |
304 const vec_u8 permP0 = vec_lvsl(+0, src); | |
305 const vec_u8 permP1 = vec_lvsl(+1, src); | |
306 const vec_u8 permP2 = vec_lvsl(+2, src); | |
307 const vec_u8 permP3 = vec_lvsl(+3, src); | |
308 const vec_s16 v5ss = vec_splat_s16(5); | |
309 const vec_u16 v5us = vec_splat_u16(5); | |
310 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); | |
311 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
312 |
8494 | 313 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
314 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
315 register int align = ((((unsigned long)src) - 2) % 16); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
316 |
8494 | 317 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
318 srcP2A, srcP2B, srcP3A, srcP3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
319 srcM1A, srcM1B, srcM2A, srcM2B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
320 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
321 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
322 psumA, psumB, sumA, sumB; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
323 |
8494 | 324 vec_u8 sum, vdst, fsum; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
325 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
326 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
327 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
328 for (i = 0 ; i < 16 ; i ++) { |
8494 | 329 vec_u8 srcR1 = vec_ld(-2, src); |
330 vec_u8 srcR2 = vec_ld(14, src); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
331 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
332 switch (align) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
333 default: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
334 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
335 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
336 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
337 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
338 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
339 srcP3 = vec_perm(srcR1, srcR2, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
340 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
341 case 11: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
342 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
343 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
344 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
345 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
346 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
347 srcP3 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
348 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
349 case 12: { |
8494 | 350 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
351 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
352 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
353 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
354 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
355 srcP2 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
356 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
357 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
358 case 13: { |
8494 | 359 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
360 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
361 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
362 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
363 srcP1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
364 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
365 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
366 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
367 case 14: { |
8494 | 368 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
369 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
370 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
371 srcP0 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
372 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
373 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
374 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
375 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
376 case 15: { |
8494 | 377 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
378 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
379 srcM1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
380 srcP0 = vec_perm(srcR2, srcR3, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
381 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
382 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
383 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
384 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
385 } |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
386 |
8494 | 387 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0); |
388 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0); | |
389 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1); | |
390 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
391 |
8494 | 392 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2); |
393 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2); | |
394 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3); | |
395 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
396 |
8494 | 397 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1); |
398 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1); | |
399 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2); | |
400 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
401 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
402 sum1A = vec_adds(srcP0A, srcP1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
403 sum1B = vec_adds(srcP0B, srcP1B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
404 sum2A = vec_adds(srcM1A, srcP2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
405 sum2B = vec_adds(srcM1B, srcP2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
406 sum3A = vec_adds(srcM2A, srcP3A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
407 sum3B = vec_adds(srcM2B, srcP3B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
408 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
409 pp1A = vec_mladd(sum1A, v20ss, v16ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
410 pp1B = vec_mladd(sum1B, v20ss, v16ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
411 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
412 pp2A = vec_mladd(sum2A, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
413 pp2B = vec_mladd(sum2B, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
414 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
415 pp3A = vec_add(sum3A, pp1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
416 pp3B = vec_add(sum3B, pp1B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
417 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
418 psumA = vec_sub(pp3A, pp2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
419 psumB = vec_sub(pp3B, pp2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
420 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
421 sumA = vec_sra(psumA, v5us); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
422 sumB = vec_sra(psumB, v5us); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
423 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
424 sum = vec_packsu(sumA, sumB); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
425 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
426 ASSERT_ALIGNED(dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
427 vdst = vec_ld(0, dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
428 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
429 OP_U8_ALTIVEC(fsum, sum, vdst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
430 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
431 vec_st(fsum, 0, dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
432 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
433 src += srcStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
434 dst += dstStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
435 } |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
436 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
437 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
438 |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
439 /* this code assume stride % 16 == 0 */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
440 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
441 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1); |
2967 | 442 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
443 register int i; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
444 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
445 LOAD_ZERO; |
8494 | 446 const vec_u8 perm = vec_lvsl(0, src); |
447 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); | |
448 const vec_u16 v5us = vec_splat_u16(5); | |
449 const vec_s16 v5ss = vec_splat_s16(5); | |
450 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); | |
2967 | 451 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
452 uint8_t *srcbis = src - (srcStride * 2); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
453 |
8494 | 454 const vec_u8 srcM2a = vec_ld(0, srcbis); |
455 const vec_u8 srcM2b = vec_ld(16, srcbis); | |
456 const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
457 //srcbis += srcStride; |
8494 | 458 const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride); |
459 const vec_u8 srcM1b = vec_ld(16, srcbis); | |
460 const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
461 //srcbis += srcStride; |
8494 | 462 const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride); |
463 const vec_u8 srcP0b = vec_ld(16, srcbis); | |
464 const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
465 //srcbis += srcStride; |
8494 | 466 const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride); |
467 const vec_u8 srcP1b = vec_ld(16, srcbis); | |
468 const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
469 //srcbis += srcStride; |
8494 | 470 const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride); |
471 const vec_u8 srcP2b = vec_ld(16, srcbis); | |
472 const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
473 //srcbis += srcStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
474 |
8494 | 475 vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2); |
476 vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2); | |
477 vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1); | |
478 vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1); | |
479 vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0); | |
480 vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0); | |
481 vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1); | |
482 vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1); | |
483 vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2); | |
484 vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2); | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
485 |
8494 | 486 vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
487 psumA, psumB, sumA, sumB, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
488 srcP3ssA, srcP3ssB, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
489 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
490 |
8494 | 491 vec_u8 sum, vdst, fsum, srcP3a, srcP3b, srcP3; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
492 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
493 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
494 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
495 for (i = 0 ; i < 16 ; i++) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
496 srcP3a = vec_ld(0, srcbis += srcStride); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
497 srcP3b = vec_ld(16, srcbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
498 srcP3 = vec_perm(srcP3a, srcP3b, perm); |
8494 | 499 srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3); |
500 srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
501 //srcbis += srcStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
502 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
503 sum1A = vec_adds(srcP0ssA, srcP1ssA); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
504 sum1B = vec_adds(srcP0ssB, srcP1ssB); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
505 sum2A = vec_adds(srcM1ssA, srcP2ssA); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
506 sum2B = vec_adds(srcM1ssB, srcP2ssB); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
507 sum3A = vec_adds(srcM2ssA, srcP3ssA); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
508 sum3B = vec_adds(srcM2ssB, srcP3ssB); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
509 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
510 srcM2ssA = srcM1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
511 srcM2ssB = srcM1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
512 srcM1ssA = srcP0ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
513 srcM1ssB = srcP0ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
514 srcP0ssA = srcP1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
515 srcP0ssB = srcP1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
516 srcP1ssA = srcP2ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
517 srcP1ssB = srcP2ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
518 srcP2ssA = srcP3ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
519 srcP2ssB = srcP3ssB; |
2967 | 520 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
521 pp1A = vec_mladd(sum1A, v20ss, v16ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
522 pp1B = vec_mladd(sum1B, v20ss, v16ss); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
523 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
524 pp2A = vec_mladd(sum2A, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
525 pp2B = vec_mladd(sum2B, v5ss, zero_s16v); |
2967 | 526 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
527 pp3A = vec_add(sum3A, pp1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
528 pp3B = vec_add(sum3B, pp1B); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
529 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
530 psumA = vec_sub(pp3A, pp2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
531 psumB = vec_sub(pp3B, pp2B); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
532 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
533 sumA = vec_sra(psumA, v5us); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
534 sumB = vec_sra(psumB, v5us); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
535 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
536 sum = vec_packsu(sumA, sumB); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
537 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
538 ASSERT_ALIGNED(dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
539 vdst = vec_ld(0, dst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
540 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
541 OP_U8_ALTIVEC(fsum, sum, vdst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
542 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
543 vec_st(fsum, 0, dst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
544 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
545 dst += dstStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
546 } |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
547 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
548 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
549 |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
550 /* this code assume stride % 16 == 0 *and* tmp is properly aligned */ |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
551 static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) { |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
552 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
553 register int i; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
554 LOAD_ZERO; |
8494 | 555 const vec_u8 permM2 = vec_lvsl(-2, src); |
556 const vec_u8 permM1 = vec_lvsl(-1, src); | |
557 const vec_u8 permP0 = vec_lvsl(+0, src); | |
558 const vec_u8 permP1 = vec_lvsl(+1, src); | |
559 const vec_u8 permP2 = vec_lvsl(+2, src); | |
560 const vec_u8 permP3 = vec_lvsl(+3, src); | |
561 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); | |
562 const vec_u32 v10ui = vec_splat_u32(10); | |
563 const vec_s16 v5ss = vec_splat_s16(5); | |
564 const vec_s16 v1ss = vec_splat_s16(1); | |
565 const vec_s32 v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9)); | |
566 const vec_u32 v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4)); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
567 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
568 register int align = ((((unsigned long)src) - 2) % 16); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
569 |
8494 | 570 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
571 srcP2A, srcP2B, srcP3A, srcP3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
572 srcM1A, srcM1B, srcM2A, srcM2B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
573 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
574 pp1A, pp1B, pp2A, pp2B, psumA, psumB; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
575 |
8494 | 576 const vec_u8 mperm = (const vec_u8) |
7373
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
7333
diff
changeset
|
577 {0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B, |
266d4949aa15
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
7333
diff
changeset
|
578 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F}; |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
579 int16_t *tmpbis = tmp; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
580 |
8494 | 581 vec_s16 tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
582 tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
583 tmpP2ssA, tmpP2ssB; |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
584 |
8494 | 585 vec_s32 pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo, |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
586 pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
587 pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo, |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
588 ssumAe, ssumAo, ssumBe, ssumBo; |
8494 | 589 vec_u8 fsum, sumv, sum, vdst; |
590 vec_s16 ssume, ssumo; | |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
591 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
592 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
593 src -= (2 * srcStride); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
594 for (i = 0 ; i < 21 ; i ++) { |
8494 | 595 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; |
596 vec_u8 srcR1 = vec_ld(-2, src); | |
597 vec_u8 srcR2 = vec_ld(14, src); | |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
598 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
599 switch (align) { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
600 default: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
601 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
602 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
603 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
604 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
605 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
606 srcP3 = vec_perm(srcR1, srcR2, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
607 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
608 case 11: { |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
609 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
610 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
611 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
612 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
613 srcP2 = vec_perm(srcR1, srcR2, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
614 srcP3 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
615 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
616 case 12: { |
8494 | 617 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
618 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
619 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
620 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
621 srcP1 = vec_perm(srcR1, srcR2, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
622 srcP2 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
623 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
624 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
625 case 13: { |
8494 | 626 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
627 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
628 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
629 srcP0 = vec_perm(srcR1, srcR2, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
630 srcP1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
631 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
632 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
633 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
634 case 14: { |
8494 | 635 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
636 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
637 srcM1 = vec_perm(srcR1, srcR2, permM1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
638 srcP0 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
639 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
640 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
641 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
642 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
643 case 15: { |
8494 | 644 vec_u8 srcR3 = vec_ld(30, src); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
645 srcM2 = vec_perm(srcR1, srcR2, permM2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
646 srcM1 = srcR2; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
647 srcP0 = vec_perm(srcR2, srcR3, permP0); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
648 srcP1 = vec_perm(srcR2, srcR3, permP1); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
649 srcP2 = vec_perm(srcR2, srcR3, permP2); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
650 srcP3 = vec_perm(srcR2, srcR3, permP3); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
651 } break; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
652 } |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
653 |
8494 | 654 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0); |
655 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0); | |
656 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1); | |
657 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
658 |
8494 | 659 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2); |
660 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2); | |
661 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3); | |
662 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
663 |
8494 | 664 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1); |
665 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1); | |
666 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2); | |
667 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
668 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
669 sum1A = vec_adds(srcP0A, srcP1A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
670 sum1B = vec_adds(srcP0B, srcP1B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
671 sum2A = vec_adds(srcM1A, srcP2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
672 sum2B = vec_adds(srcM1B, srcP2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
673 sum3A = vec_adds(srcM2A, srcP3A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
674 sum3B = vec_adds(srcM2B, srcP3B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
675 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
676 pp1A = vec_mladd(sum1A, v20ss, sum3A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
677 pp1B = vec_mladd(sum1B, v20ss, sum3B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
678 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
679 pp2A = vec_mladd(sum2A, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
680 pp2B = vec_mladd(sum2B, v5ss, zero_s16v); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
681 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
682 psumA = vec_sub(pp1A, pp2A); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
683 psumB = vec_sub(pp1B, pp2B); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
684 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
685 vec_st(psumA, 0, tmp); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
686 vec_st(psumB, 16, tmp); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
687 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
688 src += srcStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
689 tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */ |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
690 } |
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
691 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
692 tmpM2ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
693 tmpM2ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
694 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
695 tmpM1ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
696 tmpM1ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
697 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
698 tmpP0ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
699 tmpP0ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
700 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
701 tmpP1ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
702 tmpP1ssB = vec_ld(16, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
703 tmpbis += tmpStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
704 tmpP2ssA = vec_ld(0, tmpbis); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
705 tmpP2ssB = vec_ld(16, tmpbis); |
3346
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
706 tmpbis += tmpStride; |
052765f11f1c
Cosmetics: should not hurt performance, scream if are
lu_zero
parents:
3153
diff
changeset
|
707 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
708 for (i = 0 ; i < 16 ; i++) { |
8494 | 709 const vec_s16 tmpP3ssA = vec_ld(0, tmpbis); |
710 const vec_s16 tmpP3ssB = vec_ld(16, tmpbis); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
711 |
8494 | 712 const vec_s16 sum1A = vec_adds(tmpP0ssA, tmpP1ssA); |
713 const vec_s16 sum1B = vec_adds(tmpP0ssB, tmpP1ssB); | |
714 const vec_s16 sum2A = vec_adds(tmpM1ssA, tmpP2ssA); | |
715 const vec_s16 sum2B = vec_adds(tmpM1ssB, tmpP2ssB); | |
716 const vec_s16 sum3A = vec_adds(tmpM2ssA, tmpP3ssA); | |
717 const vec_s16 sum3B = vec_adds(tmpM2ssB, tmpP3ssB); | |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
718 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
719 tmpbis += tmpStride; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
720 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
721 tmpM2ssA = tmpM1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
722 tmpM2ssB = tmpM1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
723 tmpM1ssA = tmpP0ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
724 tmpM1ssB = tmpP0ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
725 tmpP0ssA = tmpP1ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
726 tmpP0ssB = tmpP1ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
727 tmpP1ssA = tmpP2ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
728 tmpP1ssB = tmpP2ssB; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
729 tmpP2ssA = tmpP3ssA; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
730 tmpP2ssB = tmpP3ssB; |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
731 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
732 pp1Ae = vec_mule(sum1A, v20ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
733 pp1Ao = vec_mulo(sum1A, v20ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
734 pp1Be = vec_mule(sum1B, v20ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
735 pp1Bo = vec_mulo(sum1B, v20ss); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
736 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
737 pp2Ae = vec_mule(sum2A, v5ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
738 pp2Ao = vec_mulo(sum2A, v5ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
739 pp2Be = vec_mule(sum2B, v5ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
740 pp2Bo = vec_mulo(sum2B, v5ss); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
741 |
8494 | 742 pp3Ae = vec_sra((vec_s32)sum3A, v16ui); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
743 pp3Ao = vec_mulo(sum3A, v1ss); |
8494 | 744 pp3Be = vec_sra((vec_s32)sum3B, v16ui); |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
745 pp3Bo = vec_mulo(sum3B, v1ss); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
746 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
747 pp1cAe = vec_add(pp1Ae, v512si); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
748 pp1cAo = vec_add(pp1Ao, v512si); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
749 pp1cBe = vec_add(pp1Be, v512si); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
750 pp1cBo = vec_add(pp1Bo, v512si); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
751 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
752 pp32Ae = vec_sub(pp3Ae, pp2Ae); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
753 pp32Ao = vec_sub(pp3Ao, pp2Ao); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
754 pp32Be = vec_sub(pp3Be, pp2Be); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
755 pp32Bo = vec_sub(pp3Bo, pp2Bo); |
2967 | 756 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
757 sumAe = vec_add(pp1cAe, pp32Ae); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
758 sumAo = vec_add(pp1cAo, pp32Ao); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
759 sumBe = vec_add(pp1cBe, pp32Be); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
760 sumBo = vec_add(pp1cBo, pp32Bo); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
761 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
762 ssumAe = vec_sra(sumAe, v10ui); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
763 ssumAo = vec_sra(sumAo, v10ui); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
764 ssumBe = vec_sra(sumBe, v10ui); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
765 ssumBo = vec_sra(sumBo, v10ui); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
766 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
767 ssume = vec_packs(ssumAe, ssumBe); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
768 ssumo = vec_packs(ssumAo, ssumBo); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
769 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
770 sumv = vec_packsu(ssume, ssumo); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
771 sum = vec_perm(sumv, sumv, mperm); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
772 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
773 ASSERT_ALIGNED(dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
774 vdst = vec_ld(0, dst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
775 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
776 OP_U8_ALTIVEC(fsum, sum, vdst); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
777 |
7333
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
778 vec_st(fsum, 0, dst); |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
779 |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
780 dst += dstStride; |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
781 } |
a8a79f5385f6
cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents:
6067
diff
changeset
|
782 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1); |
2236
b0102ea621dd
h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff
changeset
|
783 } |