annotate ppc/h264_template_altivec.c @ 10936:b2ea6b0d17bf libavcodec

Update libx264.c to use new libx264 features With b_keyframe instead of IDR for detecting keyframes, ffmpeg should now support periodic encoding with periodic intra refresh (although there is no interface option for it yet). Set the new timebase values for full VFR input support. Bump configure to check for API version 83.
author darkshikari
date Tue, 19 Jan 2010 04:00:08 +0000
parents fe17033a79ed
children 34a65026fa06
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
1 /*
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
3 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
4 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
5 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
10 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
14 * Lesser General Public License for more details.
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
15 *
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
19 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
20
5603
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
21 //#define DEBUG_ALIGNMENT
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
22 #ifdef DEBUG_ALIGNMENT
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
23 #define ASSERT_ALIGNED(ptr) assert(((unsigned long)ptr&0x0000000F));
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
24 #else
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
25 #define ASSERT_ALIGNED(ptr) ;
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
26 #endif
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
27
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
28 /* this code assume that stride % 16 == 0 */
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
29
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
30 #define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
31 vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
32 vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
33 \
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
34 psum = vec_mladd(vA, vsrc0ssH, BIAS1);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
35 psum = vec_mladd(vB, vsrc1ssH, psum);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
36 psum = vec_mladd(vC, vsrc2ssH, psum);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
37 psum = vec_mladd(vD, vsrc3ssH, psum);\
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
38 psum = BIAS2(psum);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
39 psum = vec_sr(psum, v6us);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
40 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
41 vdst = vec_ld(0, dst);\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
42 ppsum = (vec_u8)vec_pack(psum, psum);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
43 vfdst = vec_perm(vdst, ppsum, fperm);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
44 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
45 OP_U8_ALTIVEC(fsum, vfdst, vdst);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
46 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
47 vec_st(fsum, 0, dst);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
48 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
49 vsrc0ssH = vsrc2ssH;\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
50 vsrc1ssH = vsrc3ssH;\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
51 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
52 dst += stride;\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
53 src += stride;
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
54
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
55 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
6064
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
56 \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
57 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
58 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
59 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
60 psum = vec_mladd(vA, vsrc0ssH, v32ss);\
6064
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
61 psum = vec_mladd(vE, vsrc1ssH, psum);\
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
62 psum = vec_sr(psum, v6us);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
63 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
64 vdst = vec_ld(0, dst);\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
65 ppsum = (vec_u8)vec_pack(psum, psum);\
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
66 vfdst = vec_perm(vdst, ppsum, fperm);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
67 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
68 OP_U8_ALTIVEC(fsum, vfdst, vdst);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
69 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
70 vec_st(fsum, 0, dst);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
71 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
72 dst += stride;\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
73 src += stride;
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
74
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
75 #define noop(a) a
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
76 #define add28(a) vec_add(v28ss, a)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
77
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
78 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
79 int stride, int h, int x, int y) {
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
80 POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
5019
41cabe79ba25 use macro Use DECLARE_ALIGNED_16 to align stack-allocated variables
gpoirier
parents: 3947
diff changeset
81 DECLARE_ALIGNED_16(signed int, ABCD[4]) =
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
82 {((8 - x) * (8 - y)),
6058
93089aed00cb Cosmetics
lu_zero
parents: 5603
diff changeset
83 (( x) * (8 - y)),
93089aed00cb Cosmetics
lu_zero
parents: 5603
diff changeset
84 ((8 - x) * ( y)),
93089aed00cb Cosmetics
lu_zero
parents: 5603
diff changeset
85 (( x) * ( y))};
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
86 register int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
87 vec_u8 fperm;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
88 const vec_s32 vABCD = vec_ld(0, ABCD);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
89 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
90 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
91 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
92 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
5530
cd266411b11a use shorter types vec_"type" instead of the too long vector "type"
gpoirier
parents: 5019
diff changeset
93 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
94 const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
95 const vec_u16 v6us = vec_splat_u16(6);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
96 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
97 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
98
9166
40738baaafc2 Add av_uninit to vsrcBuc variable to work around some
diego
parents: 8494
diff changeset
99 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
100 vec_u8 vsrc0uc, vsrc1uc;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
101 vec_s16 vsrc0ssH, vsrc1ssH;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
102 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
103 vec_s16 vsrc2ssH, vsrc3ssH, psum;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
104 vec_u8 vdst, ppsum, vfdst, fsum;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
105
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
106 POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
107
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
108 if (((unsigned long)dst) % 16 == 0) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
109 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
9167
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
110 0x14, 0x15, 0x16, 0x17,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
111 0x08, 0x09, 0x0A, 0x0B,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
112 0x0C, 0x0D, 0x0E, 0x0F};
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
113 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
114 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
9167
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
115 0x04, 0x05, 0x06, 0x07,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
116 0x18, 0x19, 0x1A, 0x1B,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
117 0x1C, 0x1D, 0x1E, 0x1F};
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
118 }
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
119
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
120 vsrcAuc = vec_ld(0, src);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
121
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
122 if (loadSecond)
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
123 vsrcBuc = vec_ld(16, src);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
124 vsrcperm0 = vec_lvsl(0, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
125 vsrcperm1 = vec_lvsl(1, src);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
126
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
127 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
128 if (reallyBadAlign)
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
129 vsrc1uc = vsrcBuc;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
130 else
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
131 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
132
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
133 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
134 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
135
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
136 if (ABCD[3]) {
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
137 if (!loadSecond) {// -> !reallyBadAlign
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
138 for (i = 0 ; i < h ; i++) {
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
139 vsrcCuc = vec_ld(stride + 0, src);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
140 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
141 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
142
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
143 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
144 }
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
145 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
146 vec_u8 vsrcDuc;
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
147 for (i = 0 ; i < h ; i++) {
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
148 vsrcCuc = vec_ld(stride + 0, src);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
149 vsrcDuc = vec_ld(stride + 16, src);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
150 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
151 if (reallyBadAlign)
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
152 vsrc3uc = vsrcDuc;
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
153 else
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
154 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
155
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
156 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
157 }
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
158 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
159 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
160 const vec_s16 vE = vec_add(vB, vC);
6065
180976fd652e 10l do not load after the buffer...
lu_zero
parents: 6064
diff changeset
161 if (ABCD[2]) { // x == 0 B == 0
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
162 if (!loadSecond) {// -> !reallyBadAlign
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
163 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
164 vsrcCuc = vec_ld(stride + 0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
165 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
166 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
167
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
168 vsrc0uc = vsrc1uc;
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
169 }
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
170 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
171 vec_u8 vsrcDuc;
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
172 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
173 vsrcCuc = vec_ld(stride + 0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
174 vsrcDuc = vec_ld(stride + 15, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
175 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
176 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
177
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
178 vsrc0uc = vsrc1uc;
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
179 }
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
180 }
6065
180976fd652e 10l do not load after the buffer...
lu_zero
parents: 6064
diff changeset
181 } else { // y == 0 C == 0
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
182 if (!loadSecond) {// -> !reallyBadAlign
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
183 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
184 vsrcCuc = vec_ld(0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
185 vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
186 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
6065
180976fd652e 10l do not load after the buffer...
lu_zero
parents: 6064
diff changeset
187
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
188 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
189 }
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
190 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
191 vec_u8 vsrcDuc;
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
192 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
193 vsrcCuc = vec_ld(0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
194 vsrcDuc = vec_ld(15, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
195 vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
196 if (reallyBadAlign)
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
197 vsrc1uc = vsrcDuc;
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
198 else
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
199 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
200
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
201 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
202 }
6064
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
203 }
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
204 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
205 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
206 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
207 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
208
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
209 /* this code assume that stride % 16 == 0 */
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
210 void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
211 DECLARE_ALIGNED_16(signed int, ABCD[4]) =
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
212 {((8 - x) * (8 - y)),
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
213 (( x) * (8 - y)),
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
214 ((8 - x) * ( y)),
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
215 (( x) * ( y))};
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
216 register int i;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
217 vec_u8 fperm;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
218 const vec_s32 vABCD = vec_ld(0, ABCD);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
219 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
220 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
221 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
222 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
223 LOAD_ZERO;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
224 const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
225 const vec_u16 v6us = vec_splat_u16(6);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
226 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
227 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
228
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
229 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
230 vec_u8 vsrc0uc, vsrc1uc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
231 vec_s16 vsrc0ssH, vsrc1ssH;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
232 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
233 vec_s16 vsrc2ssH, vsrc3ssH, psum;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
234 vec_u8 vdst, ppsum, vfdst, fsum;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
235
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
236 if (((unsigned long)dst) % 16 == 0) {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
237 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
238 0x14, 0x15, 0x16, 0x17,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
239 0x08, 0x09, 0x0A, 0x0B,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
240 0x0C, 0x0D, 0x0E, 0x0F};
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
241 } else {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
242 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
243 0x04, 0x05, 0x06, 0x07,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
244 0x18, 0x19, 0x1A, 0x1B,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
245 0x1C, 0x1D, 0x1E, 0x1F};
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
246 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
247
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
248 vsrcAuc = vec_ld(0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
249
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
250 if (loadSecond)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
251 vsrcBuc = vec_ld(16, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
252 vsrcperm0 = vec_lvsl(0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
253 vsrcperm1 = vec_lvsl(1, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
254
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
255 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
256 if (reallyBadAlign)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
257 vsrc1uc = vsrcBuc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
258 else
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
259 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
260
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
261 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
262 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
263
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
264 if (!loadSecond) {// -> !reallyBadAlign
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
265 for (i = 0 ; i < h ; i++) {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
266
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
267
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
268 vsrcCuc = vec_ld(stride + 0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
269
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
270 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
271 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
272
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
273 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
274 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
275 } else {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
276 vec_u8 vsrcDuc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
277 for (i = 0 ; i < h ; i++) {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
278 vsrcCuc = vec_ld(stride + 0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
279 vsrcDuc = vec_ld(stride + 16, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
280
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
281 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
282 if (reallyBadAlign)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
283 vsrc3uc = vsrcDuc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
284 else
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
285 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
286
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
287 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
288 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
289 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
290 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
291
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
292 #undef noop
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
293 #undef add28
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
294 #undef CHROMA_MC8_ALTIVEC_CORE
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
295
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
296 /* this code assume stride % 16 == 0 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
297 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
298 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
299 register int i;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
300
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
301 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
302 const vec_u8 permM2 = vec_lvsl(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
303 const vec_u8 permM1 = vec_lvsl(-1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
304 const vec_u8 permP0 = vec_lvsl(+0, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
305 const vec_u8 permP1 = vec_lvsl(+1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
306 const vec_u8 permP2 = vec_lvsl(+2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
307 const vec_u8 permP3 = vec_lvsl(+3, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
308 const vec_s16 v5ss = vec_splat_s16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
309 const vec_u16 v5us = vec_splat_u16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
310 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
311 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
312
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
313 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
314
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
315 register int align = ((((unsigned long)src) - 2) % 16);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
316
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
317 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
318 srcP2A, srcP2B, srcP3A, srcP3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
319 srcM1A, srcM1B, srcM2A, srcM2B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
320 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
321 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
322 psumA, psumB, sumA, sumB;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
323
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
324 vec_u8 sum, vdst, fsum;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
325
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
326 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
327
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
328 for (i = 0 ; i < 16 ; i ++) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
329 vec_u8 srcR1 = vec_ld(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
330 vec_u8 srcR2 = vec_ld(14, src);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
331
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
332 switch (align) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
333 default: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
334 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
335 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
336 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
337 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
338 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
339 srcP3 = vec_perm(srcR1, srcR2, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
340 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
341 case 11: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
342 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
343 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
344 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
345 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
346 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
347 srcP3 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
348 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
349 case 12: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
350 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
351 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
352 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
353 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
354 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
355 srcP2 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
356 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
357 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
358 case 13: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
359 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
360 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
361 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
362 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
363 srcP1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
364 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
365 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
366 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
367 case 14: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
368 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
369 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
370 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
371 srcP0 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
372 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
373 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
374 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
375 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
376 case 15: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
377 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
378 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
379 srcM1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
380 srcP0 = vec_perm(srcR2, srcR3, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
381 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
382 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
383 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
384 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
385 }
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
386
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
387 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
388 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
389 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
390 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
391
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
392 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
393 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
394 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
395 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
396
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
397 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
398 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
399 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
400 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
401
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
402 sum1A = vec_adds(srcP0A, srcP1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
403 sum1B = vec_adds(srcP0B, srcP1B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
404 sum2A = vec_adds(srcM1A, srcP2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
405 sum2B = vec_adds(srcM1B, srcP2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
406 sum3A = vec_adds(srcM2A, srcP3A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
407 sum3B = vec_adds(srcM2B, srcP3B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
408
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
409 pp1A = vec_mladd(sum1A, v20ss, v16ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
410 pp1B = vec_mladd(sum1B, v20ss, v16ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
411
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
412 pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
413 pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
414
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
415 pp3A = vec_add(sum3A, pp1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
416 pp3B = vec_add(sum3B, pp1B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
417
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
418 psumA = vec_sub(pp3A, pp2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
419 psumB = vec_sub(pp3B, pp2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
420
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
421 sumA = vec_sra(psumA, v5us);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
422 sumB = vec_sra(psumB, v5us);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
423
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
424 sum = vec_packsu(sumA, sumB);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
425
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
426 ASSERT_ALIGNED(dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
427 vdst = vec_ld(0, dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
428
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
429 OP_U8_ALTIVEC(fsum, sum, vdst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
430
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
431 vec_st(fsum, 0, dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
432
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
433 src += srcStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
434 dst += dstStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
435 }
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
436 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
437 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
438
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
439 /* this code assume stride % 16 == 0 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
440 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
441 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
442
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
443 register int i;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
444
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
445 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
446 const vec_u8 perm = vec_lvsl(0, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
447 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
448 const vec_u16 v5us = vec_splat_u16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
449 const vec_s16 v5ss = vec_splat_s16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
450 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
451
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
452 uint8_t *srcbis = src - (srcStride * 2);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
453
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
454 const vec_u8 srcM2a = vec_ld(0, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
455 const vec_u8 srcM2b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
456 const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
457 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
458 const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
459 const vec_u8 srcM1b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
460 const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
461 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
462 const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
463 const vec_u8 srcP0b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
464 const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
465 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
466 const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
467 const vec_u8 srcP1b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
468 const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
469 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
470 const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
471 const vec_u8 srcP2b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
472 const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
473 //srcbis += srcStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
474
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
475 vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
476 vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
477 vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
478 vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
479 vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
480 vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
481 vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
482 vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
483 vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
484 vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
485
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
486 vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
487 psumA, psumB, sumA, sumB,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
488 srcP3ssA, srcP3ssB,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
489 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
490
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
491 vec_u8 sum, vdst, fsum, srcP3a, srcP3b, srcP3;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
492
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
493 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
494
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
495 for (i = 0 ; i < 16 ; i++) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
496 srcP3a = vec_ld(0, srcbis += srcStride);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
497 srcP3b = vec_ld(16, srcbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
498 srcP3 = vec_perm(srcP3a, srcP3b, perm);
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
499 srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
500 srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
501 //srcbis += srcStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
502
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
503 sum1A = vec_adds(srcP0ssA, srcP1ssA);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
504 sum1B = vec_adds(srcP0ssB, srcP1ssB);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
505 sum2A = vec_adds(srcM1ssA, srcP2ssA);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
506 sum2B = vec_adds(srcM1ssB, srcP2ssB);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
507 sum3A = vec_adds(srcM2ssA, srcP3ssA);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
508 sum3B = vec_adds(srcM2ssB, srcP3ssB);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
509
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
510 srcM2ssA = srcM1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
511 srcM2ssB = srcM1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
512 srcM1ssA = srcP0ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
513 srcM1ssB = srcP0ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
514 srcP0ssA = srcP1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
515 srcP0ssB = srcP1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
516 srcP1ssA = srcP2ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
517 srcP1ssB = srcP2ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
518 srcP2ssA = srcP3ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
519 srcP2ssB = srcP3ssB;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
520
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
521 pp1A = vec_mladd(sum1A, v20ss, v16ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
522 pp1B = vec_mladd(sum1B, v20ss, v16ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
523
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
524 pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
525 pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
526
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
527 pp3A = vec_add(sum3A, pp1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
528 pp3B = vec_add(sum3B, pp1B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
529
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
530 psumA = vec_sub(pp3A, pp2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
531 psumB = vec_sub(pp3B, pp2B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
532
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
533 sumA = vec_sra(psumA, v5us);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
534 sumB = vec_sra(psumB, v5us);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
535
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
536 sum = vec_packsu(sumA, sumB);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
537
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
538 ASSERT_ALIGNED(dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
539 vdst = vec_ld(0, dst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
540
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
541 OP_U8_ALTIVEC(fsum, sum, vdst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
542
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
543 vec_st(fsum, 0, dst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
544
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
545 dst += dstStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
546 }
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
547 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
548 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
549
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
550 /* this code assume stride % 16 == 0 *and* tmp is properly aligned */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
551 static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
552 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
553 register int i;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
554 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
555 const vec_u8 permM2 = vec_lvsl(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
556 const vec_u8 permM1 = vec_lvsl(-1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
557 const vec_u8 permP0 = vec_lvsl(+0, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
558 const vec_u8 permP1 = vec_lvsl(+1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
559 const vec_u8 permP2 = vec_lvsl(+2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
560 const vec_u8 permP3 = vec_lvsl(+3, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
561 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
562 const vec_u32 v10ui = vec_splat_u32(10);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
563 const vec_s16 v5ss = vec_splat_s16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
564 const vec_s16 v1ss = vec_splat_s16(1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
565 const vec_s32 v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
566 const vec_u32 v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
567
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
568 register int align = ((((unsigned long)src) - 2) % 16);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
569
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
570 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
571 srcP2A, srcP2B, srcP3A, srcP3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
572 srcM1A, srcM1B, srcM2A, srcM2B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
573 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
574 pp1A, pp1B, pp2A, pp2B, psumA, psumB;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
575
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
576 const vec_u8 mperm = (const vec_u8)
7373
266d4949aa15 Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 7333
diff changeset
577 {0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
266d4949aa15 Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 7333
diff changeset
578 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F};
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
579 int16_t *tmpbis = tmp;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
580
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
581 vec_s16 tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
582 tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
583 tmpP2ssA, tmpP2ssB;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
584
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
585 vec_s32 pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
586 pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
587 pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
588 ssumAe, ssumAo, ssumBe, ssumBo;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
589 vec_u8 fsum, sumv, sum, vdst;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
590 vec_s16 ssume, ssumo;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
591
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
592 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
593 src -= (2 * srcStride);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
594 for (i = 0 ; i < 21 ; i ++) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
595 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
596 vec_u8 srcR1 = vec_ld(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
597 vec_u8 srcR2 = vec_ld(14, src);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
598
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
599 switch (align) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
600 default: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
601 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
602 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
603 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
604 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
605 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
606 srcP3 = vec_perm(srcR1, srcR2, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
607 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
608 case 11: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
609 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
610 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
611 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
612 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
613 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
614 srcP3 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
615 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
616 case 12: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
617 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
618 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
619 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
620 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
621 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
622 srcP2 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
623 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
624 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
625 case 13: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
626 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
627 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
628 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
629 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
630 srcP1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
631 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
632 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
633 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
634 case 14: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
635 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
636 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
637 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
638 srcP0 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
639 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
640 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
641 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
642 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
643 case 15: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
644 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
645 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
646 srcM1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
647 srcP0 = vec_perm(srcR2, srcR3, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
648 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
649 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
650 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
651 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
652 }
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
653
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
654 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
655 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
656 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
657 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
658
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
659 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
660 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
661 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
662 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
663
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
664 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
665 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
666 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
667 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
668
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
669 sum1A = vec_adds(srcP0A, srcP1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
670 sum1B = vec_adds(srcP0B, srcP1B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
671 sum2A = vec_adds(srcM1A, srcP2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
672 sum2B = vec_adds(srcM1B, srcP2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
673 sum3A = vec_adds(srcM2A, srcP3A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
674 sum3B = vec_adds(srcM2B, srcP3B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
675
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
676 pp1A = vec_mladd(sum1A, v20ss, sum3A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
677 pp1B = vec_mladd(sum1B, v20ss, sum3B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
678
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
679 pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
680 pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
681
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
682 psumA = vec_sub(pp1A, pp2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
683 psumB = vec_sub(pp1B, pp2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
684
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
685 vec_st(psumA, 0, tmp);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
686 vec_st(psumB, 16, tmp);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
687
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
688 src += srcStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
689 tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
690 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
691
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
692 tmpM2ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
693 tmpM2ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
694 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
695 tmpM1ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
696 tmpM1ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
697 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
698 tmpP0ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
699 tmpP0ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
700 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
701 tmpP1ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
702 tmpP1ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
703 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
704 tmpP2ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
705 tmpP2ssB = vec_ld(16, tmpbis);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
706 tmpbis += tmpStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
707
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
708 for (i = 0 ; i < 16 ; i++) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
709 const vec_s16 tmpP3ssA = vec_ld(0, tmpbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
710 const vec_s16 tmpP3ssB = vec_ld(16, tmpbis);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
711
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
712 const vec_s16 sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
713 const vec_s16 sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
714 const vec_s16 sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
715 const vec_s16 sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
716 const vec_s16 sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
717 const vec_s16 sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
718
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
719 tmpbis += tmpStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
720
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
721 tmpM2ssA = tmpM1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
722 tmpM2ssB = tmpM1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
723 tmpM1ssA = tmpP0ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
724 tmpM1ssB = tmpP0ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
725 tmpP0ssA = tmpP1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
726 tmpP0ssB = tmpP1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
727 tmpP1ssA = tmpP2ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
728 tmpP1ssB = tmpP2ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
729 tmpP2ssA = tmpP3ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
730 tmpP2ssB = tmpP3ssB;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
731
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
732 pp1Ae = vec_mule(sum1A, v20ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
733 pp1Ao = vec_mulo(sum1A, v20ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
734 pp1Be = vec_mule(sum1B, v20ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
735 pp1Bo = vec_mulo(sum1B, v20ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
736
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
737 pp2Ae = vec_mule(sum2A, v5ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
738 pp2Ao = vec_mulo(sum2A, v5ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
739 pp2Be = vec_mule(sum2B, v5ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
740 pp2Bo = vec_mulo(sum2B, v5ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
741
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
742 pp3Ae = vec_sra((vec_s32)sum3A, v16ui);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
743 pp3Ao = vec_mulo(sum3A, v1ss);
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
744 pp3Be = vec_sra((vec_s32)sum3B, v16ui);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
745 pp3Bo = vec_mulo(sum3B, v1ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
746
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
747 pp1cAe = vec_add(pp1Ae, v512si);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
748 pp1cAo = vec_add(pp1Ao, v512si);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
749 pp1cBe = vec_add(pp1Be, v512si);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
750 pp1cBo = vec_add(pp1Bo, v512si);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
751
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
752 pp32Ae = vec_sub(pp3Ae, pp2Ae);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
753 pp32Ao = vec_sub(pp3Ao, pp2Ao);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
754 pp32Be = vec_sub(pp3Be, pp2Be);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
755 pp32Bo = vec_sub(pp3Bo, pp2Bo);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
756
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
757 sumAe = vec_add(pp1cAe, pp32Ae);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
758 sumAo = vec_add(pp1cAo, pp32Ao);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
759 sumBe = vec_add(pp1cBe, pp32Be);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
760 sumBo = vec_add(pp1cBo, pp32Bo);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
761
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
762 ssumAe = vec_sra(sumAe, v10ui);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
763 ssumAo = vec_sra(sumAo, v10ui);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
764 ssumBe = vec_sra(sumBe, v10ui);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
765 ssumBo = vec_sra(sumBo, v10ui);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
766
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
767 ssume = vec_packs(ssumAe, ssumBe);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
768 ssumo = vec_packs(ssumAo, ssumBo);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
769
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
770 sumv = vec_packsu(ssume, ssumo);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
771 sum = vec_perm(sumv, sumv, mperm);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
772
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
773 ASSERT_ALIGNED(dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
774 vdst = vec_ld(0, dst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
775
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
776 OP_U8_ALTIVEC(fsum, sum, vdst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
777
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
778 vec_st(fsum, 0, dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
779
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
780 dst += dstStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
781 }
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
782 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
783 }