annotate ppc/h264_template_altivec.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 3cd4cd0509cd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
1 /*
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
3 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
4 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
5 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
10 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
14 * Lesser General Public License for more details.
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
15 *
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
19 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
20
5603
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
21 //#define DEBUG_ALIGNMENT
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
22 #ifdef DEBUG_ALIGNMENT
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
23 #define ASSERT_ALIGNED(ptr) assert(((unsigned long)ptr&0x0000000F));
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
24 #else
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
25 #define ASSERT_ALIGNED(ptr) ;
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
26 #endif
861eb234e6ba remove alignment correction of the destination pointers in luma_16x6
gpoirier
parents: 5530
diff changeset
27
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
28 /* this code assume that stride % 16 == 0 */
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
29
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
30 #define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
31 vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
32 vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
33 \
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
34 psum = vec_mladd(vA, vsrc0ssH, BIAS1);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
35 psum = vec_mladd(vB, vsrc1ssH, psum);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
36 psum = vec_mladd(vC, vsrc2ssH, psum);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
37 psum = vec_mladd(vD, vsrc3ssH, psum);\
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
38 psum = BIAS2(psum);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
39 psum = vec_sr(psum, v6us);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
40 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
41 vdst = vec_ld(0, dst);\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
42 ppsum = (vec_u8)vec_pack(psum, psum);\
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
43 vfdst = vec_perm(vdst, ppsum, fperm);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
44 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
45 OP_U8_ALTIVEC(fsum, vfdst, vdst);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
46 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
47 vec_st(fsum, 0, dst);\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
48 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
49 vsrc0ssH = vsrc2ssH;\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
50 vsrc1ssH = vsrc3ssH;\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
51 \
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
52 dst += stride;\
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
53 src += stride;
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
54
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
55 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
6064
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
56 \
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
57 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
58 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
59 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
60 psum = vec_mladd(vA, vsrc0ssH, v32ss);\
6064
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
61 psum = vec_mladd(vE, vsrc1ssH, psum);\
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
62 psum = vec_sr(psum, v6us);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
63 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
64 vdst = vec_ld(0, dst);\
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
65 ppsum = (vec_u8)vec_pack(psum, psum);\
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
66 vfdst = vec_perm(vdst, ppsum, fperm);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
67 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
68 OP_U8_ALTIVEC(fsum, vfdst, vdst);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
69 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
70 vec_st(fsum, 0, dst);\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
71 \
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
72 dst += stride;\
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
73 src += stride;
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
74
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
75 #define noop(a) a
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
76 #define add28(a) vec_add(v28ss, a)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
77
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 11369
diff changeset
78 static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
79 int stride, int h, int x, int y) {
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
80 DECLARE_ALIGNED(16, signed int, ABCD)[4] =
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
81 {((8 - x) * (8 - y)),
6058
93089aed00cb Cosmetics
lu_zero
parents: 5603
diff changeset
82 (( x) * (8 - y)),
93089aed00cb Cosmetics
lu_zero
parents: 5603
diff changeset
83 ((8 - x) * ( y)),
93089aed00cb Cosmetics
lu_zero
parents: 5603
diff changeset
84 (( x) * ( y))};
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
85 register int i;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
86 vec_u8 fperm;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
87 const vec_s32 vABCD = vec_ld(0, ABCD);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
88 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
89 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
90 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
91 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
5530
cd266411b11a use shorter types vec_"type" instead of the too long vector "type"
gpoirier
parents: 5019
diff changeset
92 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
93 const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
94 const vec_u16 v6us = vec_splat_u16(6);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
95 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
96 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
97
9166
40738baaafc2 Add av_uninit to vsrcBuc variable to work around some
diego
parents: 8494
diff changeset
98 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
99 vec_u8 vsrc0uc, vsrc1uc;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
100 vec_s16 vsrc0ssH, vsrc1ssH;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
101 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
102 vec_s16 vsrc2ssH, vsrc3ssH, psum;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
103 vec_u8 vdst, ppsum, vfdst, fsum;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
104
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
105 if (((unsigned long)dst) % 16 == 0) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
106 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
9167
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
107 0x14, 0x15, 0x16, 0x17,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
108 0x08, 0x09, 0x0A, 0x0B,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
109 0x0C, 0x0D, 0x0E, 0x0F};
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
110 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
111 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
9167
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
112 0x04, 0x05, 0x06, 0x07,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
113 0x18, 0x19, 0x1A, 0x1B,
fd8b4aa6e493 prettyprinting cosmetics
diego
parents: 9166
diff changeset
114 0x1C, 0x1D, 0x1E, 0x1F};
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
115 }
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
116
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
117 vsrcAuc = vec_ld(0, src);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
118
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
119 if (loadSecond)
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
120 vsrcBuc = vec_ld(16, src);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
121 vsrcperm0 = vec_lvsl(0, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
122 vsrcperm1 = vec_lvsl(1, src);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
123
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
124 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
125 if (reallyBadAlign)
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
126 vsrc1uc = vsrcBuc;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
127 else
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
128 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
129
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
130 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
131 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
132
6062
9d1590a4df90 Partially address issue299, no performance change apparently
lu_zero
parents: 6061
diff changeset
133 if (ABCD[3]) {
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
134 if (!loadSecond) {// -> !reallyBadAlign
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
135 for (i = 0 ; i < h ; i++) {
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
136 vsrcCuc = vec_ld(stride + 0, src);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
137 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
138 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
139
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
140 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
141 }
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
142 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
143 vec_u8 vsrcDuc;
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
144 for (i = 0 ; i < h ; i++) {
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
145 vsrcCuc = vec_ld(stride + 0, src);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
146 vsrcDuc = vec_ld(stride + 16, src);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
147 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
148 if (reallyBadAlign)
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
149 vsrc3uc = vsrcDuc;
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
150 else
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
151 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
152
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
153 CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
154 }
6060
b1e3368c0d5e Reindent
lu_zero
parents: 6059
diff changeset
155 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
156 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
157 const vec_s16 vE = vec_add(vB, vC);
6065
180976fd652e 10l do not load after the buffer...
lu_zero
parents: 6064
diff changeset
158 if (ABCD[2]) { // x == 0 B == 0
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
159 if (!loadSecond) {// -> !reallyBadAlign
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
160 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
161 vsrcCuc = vec_ld(stride + 0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
162 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
163 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
164
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
165 vsrc0uc = vsrc1uc;
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
166 }
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
167 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
168 vec_u8 vsrcDuc;
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
169 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
170 vsrcCuc = vec_ld(stride + 0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
171 vsrcDuc = vec_ld(stride + 15, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
172 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
173 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
174
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
175 vsrc0uc = vsrc1uc;
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
176 }
6063
47ed1b9610b1 Reindent
lu_zero
parents: 6062
diff changeset
177 }
6065
180976fd652e 10l do not load after the buffer...
lu_zero
parents: 6064
diff changeset
178 } else { // y == 0 C == 0
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
179 if (!loadSecond) {// -> !reallyBadAlign
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
180 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
181 vsrcCuc = vec_ld(0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
182 vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
183 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
6065
180976fd652e 10l do not load after the buffer...
lu_zero
parents: 6064
diff changeset
184
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
185 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
186 }
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
187 } else {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
188 vec_u8 vsrcDuc;
6067
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
189 for (i = 0 ; i < h ; i++) {
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
190 vsrcCuc = vec_ld(0, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
191 vsrcDuc = vec_ld(15, src);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
192 vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
193 if (reallyBadAlign)
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
194 vsrc1uc = vsrcDuc;
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
195 else
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
196 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
197
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
198 CHROMA_MC8_ALTIVEC_CORE_SIMPLE
292269939c50 Reindent
lu_zero
parents: 6066
diff changeset
199 }
6064
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
200 }
62d040333d51 Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
lu_zero
parents: 6063
diff changeset
201 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
202 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
203 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
204
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
205 /* this code assume that stride % 16 == 0 */
11382
50415a8f1451 PPC: move prototypes to headers and make some functions static
mru
parents: 11369
diff changeset
206 static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
207 DECLARE_ALIGNED(16, signed int, ABCD)[4] =
9444
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
208 {((8 - x) * (8 - y)),
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
209 (( x) * (8 - y)),
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
210 ((8 - x) * ( y)),
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
211 (( x) * ( y))};
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
212 register int i;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
213 vec_u8 fperm;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
214 const vec_s32 vABCD = vec_ld(0, ABCD);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
215 const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
216 const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
217 const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
218 const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
219 LOAD_ZERO;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
220 const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
221 const vec_u16 v6us = vec_splat_u16(6);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
222 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
223 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
224
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
225 vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
226 vec_u8 vsrc0uc, vsrc1uc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
227 vec_s16 vsrc0ssH, vsrc1ssH;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
228 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
229 vec_s16 vsrc2ssH, vsrc3ssH, psum;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
230 vec_u8 vdst, ppsum, vfdst, fsum;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
231
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
232 if (((unsigned long)dst) % 16 == 0) {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
233 fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
234 0x14, 0x15, 0x16, 0x17,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
235 0x08, 0x09, 0x0A, 0x0B,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
236 0x0C, 0x0D, 0x0E, 0x0F};
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
237 } else {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
238 fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
239 0x04, 0x05, 0x06, 0x07,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
240 0x18, 0x19, 0x1A, 0x1B,
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
241 0x1C, 0x1D, 0x1E, 0x1F};
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
242 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
243
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
244 vsrcAuc = vec_ld(0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
245
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
246 if (loadSecond)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
247 vsrcBuc = vec_ld(16, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
248 vsrcperm0 = vec_lvsl(0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
249 vsrcperm1 = vec_lvsl(1, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
250
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
251 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
252 if (reallyBadAlign)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
253 vsrc1uc = vsrcBuc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
254 else
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
255 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
256
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
257 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
258 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
259
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
260 if (!loadSecond) {// -> !reallyBadAlign
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
261 for (i = 0 ; i < h ; i++) {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
262
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
263
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
264 vsrcCuc = vec_ld(stride + 0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
265
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
266 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
267 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
268
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
269 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
270 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
271 } else {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
272 vec_u8 vsrcDuc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
273 for (i = 0 ; i < h ; i++) {
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
274 vsrcCuc = vec_ld(stride + 0, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
275 vsrcDuc = vec_ld(stride + 16, src);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
276
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
277 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
278 if (reallyBadAlign)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
279 vsrc3uc = vsrcDuc;
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
280 else
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
281 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
282
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
283 CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
284 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
285 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
286 }
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
287
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
288 #undef noop
fe17033a79ed Altivec version of avg_no_rnd_vc1_chroma_mc8
conrad
parents: 9167
diff changeset
289 #undef add28
6059
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
290 #undef CHROMA_MC8_ALTIVEC_CORE
8c1a381bddb6 Factorize common code (almost cosmetic)
lu_zero
parents: 6058
diff changeset
291
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
292 /* this code assume stride % 16 == 0 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
293 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
294 register int i;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
295
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
296 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
297 const vec_u8 permM2 = vec_lvsl(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
298 const vec_u8 permM1 = vec_lvsl(-1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
299 const vec_u8 permP0 = vec_lvsl(+0, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
300 const vec_u8 permP1 = vec_lvsl(+1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
301 const vec_u8 permP2 = vec_lvsl(+2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
302 const vec_u8 permP3 = vec_lvsl(+3, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
303 const vec_s16 v5ss = vec_splat_s16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
304 const vec_u16 v5us = vec_splat_u16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
305 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
306 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
307
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
308 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
309
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
310 register int align = ((((unsigned long)src) - 2) % 16);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
311
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
312 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
313 srcP2A, srcP2B, srcP3A, srcP3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
314 srcM1A, srcM1B, srcM2A, srcM2B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
315 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
316 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
317 psumA, psumB, sumA, sumB;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
318
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
319 vec_u8 sum, vdst, fsum;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
320
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
321 for (i = 0 ; i < 16 ; i ++) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
322 vec_u8 srcR1 = vec_ld(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
323 vec_u8 srcR2 = vec_ld(14, src);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
324
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
325 switch (align) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
326 default: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
327 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
328 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
329 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
330 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
331 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
332 srcP3 = vec_perm(srcR1, srcR2, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
333 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
334 case 11: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
335 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
336 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
337 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
338 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
339 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
340 srcP3 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
341 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
342 case 12: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
343 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
344 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
345 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
346 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
347 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
348 srcP2 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
349 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
350 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
351 case 13: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
352 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
353 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
354 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
355 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
356 srcP1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
357 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
358 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
359 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
360 case 14: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
361 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
362 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
363 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
364 srcP0 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
365 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
366 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
367 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
368 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
369 case 15: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
370 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
371 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
372 srcM1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
373 srcP0 = vec_perm(srcR2, srcR3, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
374 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
375 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
376 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
377 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
378 }
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
379
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
380 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
381 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
382 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
383 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
384
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
385 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
386 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
387 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
388 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
389
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
390 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
391 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
392 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
393 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
394
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
395 sum1A = vec_adds(srcP0A, srcP1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
396 sum1B = vec_adds(srcP0B, srcP1B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
397 sum2A = vec_adds(srcM1A, srcP2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
398 sum2B = vec_adds(srcM1B, srcP2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
399 sum3A = vec_adds(srcM2A, srcP3A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
400 sum3B = vec_adds(srcM2B, srcP3B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
401
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
402 pp1A = vec_mladd(sum1A, v20ss, v16ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
403 pp1B = vec_mladd(sum1B, v20ss, v16ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
404
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
405 pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
406 pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
407
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
408 pp3A = vec_add(sum3A, pp1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
409 pp3B = vec_add(sum3B, pp1B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
410
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
411 psumA = vec_sub(pp3A, pp2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
412 psumB = vec_sub(pp3B, pp2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
413
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
414 sumA = vec_sra(psumA, v5us);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
415 sumB = vec_sra(psumB, v5us);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
416
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
417 sum = vec_packsu(sumA, sumB);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
418
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
419 ASSERT_ALIGNED(dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
420 vdst = vec_ld(0, dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
421
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
422 OP_U8_ALTIVEC(fsum, sum, vdst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
423
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
424 vec_st(fsum, 0, dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
425
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
426 src += srcStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
427 dst += dstStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
428 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
429 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
430
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
431 /* this code assume stride % 16 == 0 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
432 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
433 register int i;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
434
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
435 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
436 const vec_u8 perm = vec_lvsl(0, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
437 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
438 const vec_u16 v5us = vec_splat_u16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
439 const vec_s16 v5ss = vec_splat_s16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
440 const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
441
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
442 uint8_t *srcbis = src - (srcStride * 2);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
443
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
444 const vec_u8 srcM2a = vec_ld(0, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
445 const vec_u8 srcM2b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
446 const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
447 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
448 const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
449 const vec_u8 srcM1b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
450 const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
451 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
452 const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
453 const vec_u8 srcP0b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
454 const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
455 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
456 const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
457 const vec_u8 srcP1b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
458 const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
459 //srcbis += srcStride;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
460 const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
461 const vec_u8 srcP2b = vec_ld(16, srcbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
462 const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
463 //srcbis += srcStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
464
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
465 vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
466 vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
467 vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
468 vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
469 vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
470 vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
471 vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
472 vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
473 vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
474 vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
475
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
476 vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
477 psumA, psumB, sumA, sumB,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
478 srcP3ssA, srcP3ssB,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
479 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
480
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
481 vec_u8 sum, vdst, fsum, srcP3a, srcP3b, srcP3;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
482
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
483 for (i = 0 ; i < 16 ; i++) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
484 srcP3a = vec_ld(0, srcbis += srcStride);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
485 srcP3b = vec_ld(16, srcbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
486 srcP3 = vec_perm(srcP3a, srcP3b, perm);
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
487 srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
488 srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
489 //srcbis += srcStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
490
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
491 sum1A = vec_adds(srcP0ssA, srcP1ssA);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
492 sum1B = vec_adds(srcP0ssB, srcP1ssB);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
493 sum2A = vec_adds(srcM1ssA, srcP2ssA);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
494 sum2B = vec_adds(srcM1ssB, srcP2ssB);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
495 sum3A = vec_adds(srcM2ssA, srcP3ssA);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
496 sum3B = vec_adds(srcM2ssB, srcP3ssB);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
497
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
498 srcM2ssA = srcM1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
499 srcM2ssB = srcM1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
500 srcM1ssA = srcP0ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
501 srcM1ssB = srcP0ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
502 srcP0ssA = srcP1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
503 srcP0ssB = srcP1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
504 srcP1ssA = srcP2ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
505 srcP1ssB = srcP2ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
506 srcP2ssA = srcP3ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
507 srcP2ssB = srcP3ssB;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
508
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
509 pp1A = vec_mladd(sum1A, v20ss, v16ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
510 pp1B = vec_mladd(sum1B, v20ss, v16ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
511
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
512 pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
513 pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
514
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
515 pp3A = vec_add(sum3A, pp1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
516 pp3B = vec_add(sum3B, pp1B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
517
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
518 psumA = vec_sub(pp3A, pp2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
519 psumB = vec_sub(pp3B, pp2B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
520
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
521 sumA = vec_sra(psumA, v5us);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
522 sumB = vec_sra(psumB, v5us);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
523
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
524 sum = vec_packsu(sumA, sumB);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
525
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
526 ASSERT_ALIGNED(dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
527 vdst = vec_ld(0, dst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
528
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
529 OP_U8_ALTIVEC(fsum, sum, vdst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
530
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
531 vec_st(fsum, 0, dst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
532
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
533 dst += dstStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
534 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
535 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
536
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
537 /* this code assume stride % 16 == 0 *and* tmp is properly aligned */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
538 static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
539 register int i;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
540 LOAD_ZERO;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
541 const vec_u8 permM2 = vec_lvsl(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
542 const vec_u8 permM1 = vec_lvsl(-1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
543 const vec_u8 permP0 = vec_lvsl(+0, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
544 const vec_u8 permP1 = vec_lvsl(+1, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
545 const vec_u8 permP2 = vec_lvsl(+2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
546 const vec_u8 permP3 = vec_lvsl(+3, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
547 const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
548 const vec_u32 v10ui = vec_splat_u32(10);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
549 const vec_s16 v5ss = vec_splat_s16(5);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
550 const vec_s16 v1ss = vec_splat_s16(1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
551 const vec_s32 v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
552 const vec_u32 v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
553
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
554 register int align = ((((unsigned long)src) - 2) % 16);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
555
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
556 vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
557 srcP2A, srcP2B, srcP3A, srcP3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
558 srcM1A, srcM1B, srcM2A, srcM2B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
559 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
560 pp1A, pp1B, pp2A, pp2B, psumA, psumB;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
561
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
562 const vec_u8 mperm = (const vec_u8)
7373
266d4949aa15 Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 7333
diff changeset
563 {0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
266d4949aa15 Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 7333
diff changeset
564 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F};
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
565 int16_t *tmpbis = tmp;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
566
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
567 vec_s16 tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
568 tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
569 tmpP2ssA, tmpP2ssB;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
570
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
571 vec_s32 pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
572 pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
573 pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
574 ssumAe, ssumAo, ssumBe, ssumBo;
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
575 vec_u8 fsum, sumv, sum, vdst;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
576 vec_s16 ssume, ssumo;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
577
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
578 src -= (2 * srcStride);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
579 for (i = 0 ; i < 21 ; i ++) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
580 vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
581 vec_u8 srcR1 = vec_ld(-2, src);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
582 vec_u8 srcR2 = vec_ld(14, src);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
583
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
584 switch (align) {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
585 default: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
586 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
587 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
588 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
589 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
590 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
591 srcP3 = vec_perm(srcR1, srcR2, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
592 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
593 case 11: {
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
594 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
595 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
596 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
597 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
598 srcP2 = vec_perm(srcR1, srcR2, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
599 srcP3 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
600 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
601 case 12: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
602 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
603 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
604 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
605 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
606 srcP1 = vec_perm(srcR1, srcR2, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
607 srcP2 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
608 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
609 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
610 case 13: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
611 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
612 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
613 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
614 srcP0 = vec_perm(srcR1, srcR2, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
615 srcP1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
616 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
617 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
618 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
619 case 14: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
620 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
621 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
622 srcM1 = vec_perm(srcR1, srcR2, permM1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
623 srcP0 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
624 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
625 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
626 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
627 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
628 case 15: {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
629 vec_u8 srcR3 = vec_ld(30, src);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
630 srcM2 = vec_perm(srcR1, srcR2, permM2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
631 srcM1 = srcR2;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
632 srcP0 = vec_perm(srcR2, srcR3, permP0);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
633 srcP1 = vec_perm(srcR2, srcR3, permP1);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
634 srcP2 = vec_perm(srcR2, srcR3, permP2);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
635 srcP3 = vec_perm(srcR2, srcR3, permP3);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
636 } break;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
637 }
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
638
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
639 srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
640 srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
641 srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
642 srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
643
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
644 srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
645 srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
646 srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
647 srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
648
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
649 srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
650 srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
651 srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
652 srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
653
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
654 sum1A = vec_adds(srcP0A, srcP1A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
655 sum1B = vec_adds(srcP0B, srcP1B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
656 sum2A = vec_adds(srcM1A, srcP2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
657 sum2B = vec_adds(srcM1B, srcP2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
658 sum3A = vec_adds(srcM2A, srcP3A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
659 sum3B = vec_adds(srcM2B, srcP3B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
660
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
661 pp1A = vec_mladd(sum1A, v20ss, sum3A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
662 pp1B = vec_mladd(sum1B, v20ss, sum3B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
663
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
664 pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
665 pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
666
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
667 psumA = vec_sub(pp1A, pp2A);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
668 psumB = vec_sub(pp1B, pp2B);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
669
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
670 vec_st(psumA, 0, tmp);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
671 vec_st(psumB, 16, tmp);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
672
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
673 src += srcStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
674 tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
675 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
676
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
677 tmpM2ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
678 tmpM2ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
679 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
680 tmpM1ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
681 tmpM1ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
682 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
683 tmpP0ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
684 tmpP0ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
685 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
686 tmpP1ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
687 tmpP1ssB = vec_ld(16, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
688 tmpbis += tmpStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
689 tmpP2ssA = vec_ld(0, tmpbis);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
690 tmpP2ssB = vec_ld(16, tmpbis);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
691 tmpbis += tmpStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
692
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
693 for (i = 0 ; i < 16 ; i++) {
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
694 const vec_s16 tmpP3ssA = vec_ld(0, tmpbis);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
695 const vec_s16 tmpP3ssB = vec_ld(16, tmpbis);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
696
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
697 const vec_s16 sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
698 const vec_s16 sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
699 const vec_s16 sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
700 const vec_s16 sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
701 const vec_s16 sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
702 const vec_s16 sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
703
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
704 tmpbis += tmpStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
705
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
706 tmpM2ssA = tmpM1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
707 tmpM2ssB = tmpM1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
708 tmpM1ssA = tmpP0ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
709 tmpM1ssB = tmpP0ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
710 tmpP0ssA = tmpP1ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
711 tmpP0ssB = tmpP1ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
712 tmpP1ssA = tmpP2ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
713 tmpP1ssB = tmpP2ssB;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
714 tmpP2ssA = tmpP3ssA;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
715 tmpP2ssB = tmpP3ssB;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
716
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
717 pp1Ae = vec_mule(sum1A, v20ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
718 pp1Ao = vec_mulo(sum1A, v20ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
719 pp1Be = vec_mule(sum1B, v20ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
720 pp1Bo = vec_mulo(sum1B, v20ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
721
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
722 pp2Ae = vec_mule(sum2A, v5ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
723 pp2Ao = vec_mulo(sum2A, v5ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
724 pp2Be = vec_mule(sum2B, v5ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
725 pp2Bo = vec_mulo(sum2B, v5ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
726
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
727 pp3Ae = vec_sra((vec_s32)sum3A, v16ui);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
728 pp3Ao = vec_mulo(sum3A, v1ss);
8494
1615d6b75ada Cleanup _t types in libavcodec/ppc
lu_zero
parents: 7376
diff changeset
729 pp3Be = vec_sra((vec_s32)sum3B, v16ui);
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
730 pp3Bo = vec_mulo(sum3B, v1ss);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
731
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
732 pp1cAe = vec_add(pp1Ae, v512si);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
733 pp1cAo = vec_add(pp1Ao, v512si);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
734 pp1cBe = vec_add(pp1Be, v512si);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
735 pp1cBo = vec_add(pp1Bo, v512si);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
736
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
737 pp32Ae = vec_sub(pp3Ae, pp2Ae);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
738 pp32Ao = vec_sub(pp3Ao, pp2Ao);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
739 pp32Be = vec_sub(pp3Be, pp2Be);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
740 pp32Bo = vec_sub(pp3Bo, pp2Bo);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
741
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
742 sumAe = vec_add(pp1cAe, pp32Ae);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
743 sumAo = vec_add(pp1cAo, pp32Ao);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
744 sumBe = vec_add(pp1cBe, pp32Be);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
745 sumBo = vec_add(pp1cBo, pp32Bo);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
746
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
747 ssumAe = vec_sra(sumAe, v10ui);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
748 ssumAo = vec_sra(sumAo, v10ui);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
749 ssumBe = vec_sra(sumBe, v10ui);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
750 ssumBo = vec_sra(sumBo, v10ui);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
751
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
752 ssume = vec_packs(ssumAe, ssumBe);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
753 ssumo = vec_packs(ssumAo, ssumBo);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
754
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
755 sumv = vec_packsu(ssume, ssumo);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
756 sum = vec_perm(sumv, sumv, mperm);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
757
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
758 ASSERT_ALIGNED(dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
759 vdst = vec_ld(0, dst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
760
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
761 OP_U8_ALTIVEC(fsum, sum, vdst);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
762
7333
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
763 vec_st(fsum, 0, dst);
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
764
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
765 dst += dstStride;
a8a79f5385f6 cosmetics: Reformat PPC code in libavcodec according to style guidelines.
diego
parents: 6067
diff changeset
766 }
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
767 }