annotate ppc/h264_template_altivec.c @ 4765:85298e8c55c4 libavcodec

bfin dsputils, basic pixel operations sads, diffs, motion compensation and standard IEEE 8x8 block transforms patch by Marc Hoffman, mmh pleasantst com
author diego
date Sun, 01 Apr 2007 22:28:45 +0000
parents c8c591fe26f8
children 41cabe79ba25
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
1 /*
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
3 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
4 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
5 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
9 * version 2.1 of the License, or (at your option) any later version.
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
10 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
14 * Lesser General Public License for more details.
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
15 *
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3577
diff changeset
17 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
19 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
20
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
21 /* this code assume that stride % 16 == 0 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
22 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
23 POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
24 signed int ABCD[4] __attribute__((aligned(16))) =
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
25 {((8 - x) * (8 - y)),
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
26 ((x) * (8 - y)),
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
27 ((8 - x) * (y)),
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
28 ((x) * (y))};
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
29 register int i;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
30 vector unsigned char fperm;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
31 const vector signed int vABCD = vec_ld(0, ABCD);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
32 const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
33 const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
34 const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
35 const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
36 const vector signed int vzero = vec_splat_s32(0);
3137
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
37 const vector signed short v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
38 const vector unsigned short v6us = vec_splat_u16(6);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
39 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
40 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
41
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
42 vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
43 vector unsigned char vsrc0uc, vsrc1uc;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
44 vector signed short vsrc0ssH, vsrc1ssH;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
45 vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
46 vector signed short vsrc2ssH, vsrc3ssH, psum;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
47 vector unsigned char vdst, ppsum, vfdst, fsum;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
48
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
49 POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
50
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
51 if (((unsigned long)dst) % 16 == 0) {
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
52 fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
53 0x14, 0x15, 0x16, 0x17,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
54 0x08, 0x09, 0x0A, 0x0B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
55 0x0C, 0x0D, 0x0E, 0x0F);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
56 } else {
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
57 fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
58 0x04, 0x05, 0x06, 0x07,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
59 0x18, 0x19, 0x1A, 0x1B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
60 0x1C, 0x1D, 0x1E, 0x1F);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
61 }
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
62
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
63 vsrcAuc = vec_ld(0, src);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
64
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
65 if (loadSecond)
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
66 vsrcBuc = vec_ld(16, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
67 vsrcperm0 = vec_lvsl(0, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
68 vsrcperm1 = vec_lvsl(1, src);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
69
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
70 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
71 if (reallyBadAlign)
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
72 vsrc1uc = vsrcBuc;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
73 else
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
74 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
75
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
76 vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
77 (vector unsigned char)vsrc0uc);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
78 vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
79 (vector unsigned char)vsrc1uc);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
80
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
81 if (!loadSecond) {// -> !reallyBadAlign
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
82 for (i = 0 ; i < h ; i++) {
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
83
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
84
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
85 vsrcCuc = vec_ld(stride + 0, src);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
86
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
87 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
88 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
89
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
90 vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
91 (vector unsigned char)vsrc2uc);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
92 vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
93 (vector unsigned char)vsrc3uc);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
94
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
95 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
96 psum = vec_mladd(vB, vsrc1ssH, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
97 psum = vec_mladd(vC, vsrc2ssH, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
98 psum = vec_mladd(vD, vsrc3ssH, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
99 psum = vec_add(v32ss, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
100 psum = vec_sra(psum, v6us);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
101
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
102 vdst = vec_ld(0, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
103 ppsum = (vector unsigned char)vec_packsu(psum, psum);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
104 vfdst = vec_perm(vdst, ppsum, fperm);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
105
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
106 OP_U8_ALTIVEC(fsum, vfdst, vdst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
107
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
108 vec_st(fsum, 0, dst);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
109
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
110 vsrc0ssH = vsrc2ssH;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
111 vsrc1ssH = vsrc3ssH;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
112
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
113 dst += stride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
114 src += stride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
115 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
116 } else {
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
117 vector unsigned char vsrcDuc;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
118 for (i = 0 ; i < h ; i++) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
119 vsrcCuc = vec_ld(stride + 0, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
120 vsrcDuc = vec_ld(stride + 16, src);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
121
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
122 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
123 if (reallyBadAlign)
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
124 vsrc3uc = vsrcDuc;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
125 else
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
126 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
127
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
128 vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
129 (vector unsigned char)vsrc2uc);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
130 vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
131 (vector unsigned char)vsrc3uc);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
132
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
133 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
134 psum = vec_mladd(vB, vsrc1ssH, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
135 psum = vec_mladd(vC, vsrc2ssH, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
136 psum = vec_mladd(vD, vsrc3ssH, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
137 psum = vec_add(v32ss, psum);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
138 psum = vec_sr(psum, v6us);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
139
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
140 vdst = vec_ld(0, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
141 ppsum = (vector unsigned char)vec_pack(psum, psum);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
142 vfdst = vec_perm(vdst, ppsum, fperm);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
143
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
144 OP_U8_ALTIVEC(fsum, vfdst, vdst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
145
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
146 vec_st(fsum, 0, dst);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
147
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
148 vsrc0ssH = vsrc2ssH;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
149 vsrc1ssH = vsrc3ssH;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
150
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
151 dst += stride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
152 src += stride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
153 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
154 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
155 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
156 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
157
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
158 /* this code assume stride % 16 == 0 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
159 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
160 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
161 register int i;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
162
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
163 const vector signed int vzero = vec_splat_s32(0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
164 const vector unsigned char permM2 = vec_lvsl(-2, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
165 const vector unsigned char permM1 = vec_lvsl(-1, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
166 const vector unsigned char permP0 = vec_lvsl(+0, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
167 const vector unsigned char permP1 = vec_lvsl(+1, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
168 const vector unsigned char permP2 = vec_lvsl(+2, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
169 const vector unsigned char permP3 = vec_lvsl(+3, src);
3137
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
170 const vector signed short v5ss = vec_splat_s16(5);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
171 const vector unsigned short v5us = vec_splat_u16(5);
3137
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
172 const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
173 const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
174 const vector unsigned char dstperm = vec_lvsr(0, dst);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
175 const vector unsigned char neg1 =
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
176 (const vector unsigned char) vec_splat_s8(-1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
177
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
178 const vector unsigned char dstmask =
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
179 vec_perm((const vector unsigned char)vzero,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
180 neg1, dstperm);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
181
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
182 vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
183
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
184 register int align = ((((unsigned long)src) - 2) % 16);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
185
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
186 vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
187 srcP2A, srcP2B, srcP3A, srcP3B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
188 srcM1A, srcM1B, srcM2A, srcM2B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
189 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
190 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
191 psumA, psumB, sumA, sumB;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
192
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
193 vector unsigned char sum, dst1, dst2, vdst, fsum,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
194 rsum, fdst1, fdst2;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
195
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
196 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
197
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
198 for (i = 0 ; i < 16 ; i ++) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
199 vector unsigned char srcR1 = vec_ld(-2, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
200 vector unsigned char srcR2 = vec_ld(14, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
201
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
202 switch (align) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
203 default: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
204 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
205 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
206 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
207 srcP1 = vec_perm(srcR1, srcR2, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
208 srcP2 = vec_perm(srcR1, srcR2, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
209 srcP3 = vec_perm(srcR1, srcR2, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
210 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
211 case 11: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
212 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
213 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
214 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
215 srcP1 = vec_perm(srcR1, srcR2, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
216 srcP2 = vec_perm(srcR1, srcR2, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
217 srcP3 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
218 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
219 case 12: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
220 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
221 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
222 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
223 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
224 srcP1 = vec_perm(srcR1, srcR2, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
225 srcP2 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
226 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
227 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
228 case 13: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
229 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
230 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
231 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
232 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
233 srcP1 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
234 srcP2 = vec_perm(srcR2, srcR3, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
235 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
236 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
237 case 14: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
238 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
239 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
240 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
241 srcP0 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
242 srcP1 = vec_perm(srcR2, srcR3, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
243 srcP2 = vec_perm(srcR2, srcR3, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
244 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
245 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
246 case 15: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
247 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
248 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
249 srcM1 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
250 srcP0 = vec_perm(srcR2, srcR3, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
251 srcP1 = vec_perm(srcR2, srcR3, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
252 srcP2 = vec_perm(srcR2, srcR3, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
253 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
254 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
255 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
256
3350
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
257 srcP0A = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
258 vec_mergeh((vector unsigned char)vzero, srcP0);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
259 srcP0B = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
260 vec_mergel((vector unsigned char)vzero, srcP0);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
261 srcP1A = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
262 vec_mergeh((vector unsigned char)vzero, srcP1);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
263 srcP1B = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
264 vec_mergel((vector unsigned char)vzero, srcP1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
265
3350
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
266 srcP2A = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
267 vec_mergeh((vector unsigned char)vzero, srcP2);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
268 srcP2B = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
269 vec_mergel((vector unsigned char)vzero, srcP2);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
270 srcP3A = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
271 vec_mergeh((vector unsigned char)vzero, srcP3);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
272 srcP3B = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
273 vec_mergel((vector unsigned char)vzero, srcP3);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
274
3350
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
275 srcM1A = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
276 vec_mergeh((vector unsigned char)vzero, srcM1);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
277 srcM1B = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
278 vec_mergel((vector unsigned char)vzero, srcM1);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
279 srcM2A = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
280 vec_mergeh((vector unsigned char)vzero, srcM2);
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
281 srcM2B = (vector signed short)
eff63ac2b545 Make gcc-3 happy again, thanks to Olivier Castan <castan.o@free.fr> for pointing the issue
lu_zero
parents: 3346
diff changeset
282 vec_mergel((vector unsigned char)vzero, srcM2);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
283
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
284 sum1A = vec_adds(srcP0A, srcP1A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
285 sum1B = vec_adds(srcP0B, srcP1B);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
286 sum2A = vec_adds(srcM1A, srcP2A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
287 sum2B = vec_adds(srcM1B, srcP2B);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
288 sum3A = vec_adds(srcM2A, srcP3A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
289 sum3B = vec_adds(srcM2B, srcP3B);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
290
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
291 pp1A = vec_mladd(sum1A, v20ss, v16ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
292 pp1B = vec_mladd(sum1B, v20ss, v16ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
293
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
294 pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
295 pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
296
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
297 pp3A = vec_add(sum3A, pp1A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
298 pp3B = vec_add(sum3B, pp1B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
299
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
300 psumA = vec_sub(pp3A, pp2A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
301 psumB = vec_sub(pp3B, pp2B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
302
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
303 sumA = vec_sra(psumA, v5us);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
304 sumB = vec_sra(psumB, v5us);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
305
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
306 sum = vec_packsu(sumA, sumB);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
307
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
308 dst1 = vec_ld(0, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
309 dst2 = vec_ld(16, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
310 vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
311
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
312 OP_U8_ALTIVEC(fsum, sum, vdst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
313
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
314 rsum = vec_perm(fsum, fsum, dstperm);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
315 fdst1 = vec_sel(dst1, rsum, dstmask);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
316 fdst2 = vec_sel(rsum, dst2, dstmask);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
317
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
318 vec_st(fdst1, 0, dst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
319 vec_st(fdst2, 16, dst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
320
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
321 src += srcStride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
322 dst += dstStride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
323 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
324 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
325 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
326
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
327 /* this code assume stride % 16 == 0 */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
328 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
329 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
330
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
331 register int i;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
332
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
333 const vector signed int vzero = vec_splat_s32(0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
334 const vector unsigned char perm = vec_lvsl(0, src);
3137
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
335 const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
336 const vector unsigned short v5us = vec_splat_u16(5);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
337 const vector signed short v5ss = vec_splat_s16(5);
3137
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
338 const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
339 const vector unsigned char dstperm = vec_lvsr(0, dst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
340 const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
341 const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
342
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
343 uint8_t *srcbis = src - (srcStride * 2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
344
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
345 const vector unsigned char srcM2a = vec_ld(0, srcbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
346 const vector unsigned char srcM2b = vec_ld(16, srcbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
347 const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
348 // srcbis += srcStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
349 const vector unsigned char srcM1a = vec_ld(0, srcbis += srcStride);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
350 const vector unsigned char srcM1b = vec_ld(16, srcbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
351 const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
352 // srcbis += srcStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
353 const vector unsigned char srcP0a = vec_ld(0, srcbis += srcStride);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
354 const vector unsigned char srcP0b = vec_ld(16, srcbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
355 const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
356 // srcbis += srcStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
357 const vector unsigned char srcP1a = vec_ld(0, srcbis += srcStride);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
358 const vector unsigned char srcP1b = vec_ld(16, srcbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
359 const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
360 // srcbis += srcStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
361 const vector unsigned char srcP2a = vec_ld(0, srcbis += srcStride);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
362 const vector unsigned char srcP2b = vec_ld(16, srcbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
363 const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
364 // srcbis += srcStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
365
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
366 vector signed short srcM2ssA = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
367 vec_mergeh((vector unsigned char)vzero, srcM2);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
368 vector signed short srcM2ssB = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
369 vec_mergel((vector unsigned char)vzero, srcM2);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
370 vector signed short srcM1ssA = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
371 vec_mergeh((vector unsigned char)vzero, srcM1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
372 vector signed short srcM1ssB = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
373 vec_mergel((vector unsigned char)vzero, srcM1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
374 vector signed short srcP0ssA = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
375 vec_mergeh((vector unsigned char)vzero, srcP0);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
376 vector signed short srcP0ssB = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
377 vec_mergel((vector unsigned char)vzero, srcP0);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
378 vector signed short srcP1ssA = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
379 vec_mergeh((vector unsigned char)vzero, srcP1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
380 vector signed short srcP1ssB = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
381 vec_mergel((vector unsigned char)vzero, srcP1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
382 vector signed short srcP2ssA = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
383 vec_mergeh((vector unsigned char)vzero, srcP2);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
384 vector signed short srcP2ssB = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
385 vec_mergel((vector unsigned char)vzero, srcP2);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
386
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
387 vector signed short pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
388 psumA, psumB, sumA, sumB,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
389 srcP3ssA, srcP3ssB,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
390 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
391
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
392 vector unsigned char sum, dst1, dst2, vdst, fsum, rsum, fdst1, fdst2,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
393 srcP3a, srcP3b, srcP3;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
394
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
395 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
396
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
397 for (i = 0 ; i < 16 ; i++) {
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
398 srcP3a = vec_ld(0, srcbis += srcStride);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
399 srcP3b = vec_ld(16, srcbis);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
400 srcP3 = vec_perm(srcP3a, srcP3b, perm);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
401 srcP3ssA = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
402 vec_mergeh((vector unsigned char)vzero, srcP3);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
403 srcP3ssB = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
404 vec_mergel((vector unsigned char)vzero, srcP3);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
405 // srcbis += srcStride;
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
406
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
407 sum1A = vec_adds(srcP0ssA, srcP1ssA);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
408 sum1B = vec_adds(srcP0ssB, srcP1ssB);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
409 sum2A = vec_adds(srcM1ssA, srcP2ssA);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
410 sum2B = vec_adds(srcM1ssB, srcP2ssB);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
411 sum3A = vec_adds(srcM2ssA, srcP3ssA);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
412 sum3B = vec_adds(srcM2ssB, srcP3ssB);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
413
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
414 srcM2ssA = srcM1ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
415 srcM2ssB = srcM1ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
416 srcM1ssA = srcP0ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
417 srcM1ssB = srcP0ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
418 srcP0ssA = srcP1ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
419 srcP0ssB = srcP1ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
420 srcP1ssA = srcP2ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
421 srcP1ssB = srcP2ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
422 srcP2ssA = srcP3ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
423 srcP2ssB = srcP3ssB;
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
424
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
425 pp1A = vec_mladd(sum1A, v20ss, v16ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
426 pp1B = vec_mladd(sum1B, v20ss, v16ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
427
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
428 pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
429 pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
430
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
431 pp3A = vec_add(sum3A, pp1A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
432 pp3B = vec_add(sum3B, pp1B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
433
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
434 psumA = vec_sub(pp3A, pp2A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
435 psumB = vec_sub(pp3B, pp2B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
436
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
437 sumA = vec_sra(psumA, v5us);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
438 sumB = vec_sra(psumB, v5us);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
439
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
440 sum = vec_packsu(sumA, sumB);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
441
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
442 dst1 = vec_ld(0, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
443 dst2 = vec_ld(16, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
444 vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
445
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
446 OP_U8_ALTIVEC(fsum, sum, vdst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
447
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
448 rsum = vec_perm(fsum, fsum, dstperm);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
449 fdst1 = vec_sel(dst1, rsum, dstmask);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
450 fdst2 = vec_sel(rsum, dst2, dstmask);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
451
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
452 vec_st(fdst1, 0, dst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
453 vec_st(fdst2, 16, dst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
454
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
455 dst += dstStride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
456 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
457 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
458 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
459
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
460 /* this code assume stride % 16 == 0 *and* tmp is properly aligned */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
461 static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
462 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
463 register int i;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
464 const vector signed int vzero = vec_splat_s32(0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
465 const vector unsigned char permM2 = vec_lvsl(-2, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
466 const vector unsigned char permM1 = vec_lvsl(-1, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
467 const vector unsigned char permP0 = vec_lvsl(+0, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
468 const vector unsigned char permP1 = vec_lvsl(+1, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
469 const vector unsigned char permP2 = vec_lvsl(+2, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
470 const vector unsigned char permP3 = vec_lvsl(+3, src);
3137
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
471 const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
472 const vector unsigned int v10ui = vec_splat_u32(10);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
473 const vector signed short v5ss = vec_splat_s16(5);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
474 const vector signed short v1ss = vec_splat_s16(1);
3137
876d6280faf7 Fix AltiVec H.264 decoding on FSF gcc.
diego
parents: 3036
diff changeset
475 const vector signed int v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
3153
7aa01243b4d3 use vec_splat_u32 to initialise a v16ui (patch by Likai Liu < liulk _at_ cs.bu.edu >)
aurel
parents: 3137
diff changeset
476 const vector unsigned int v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
477
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
478 register int align = ((((unsigned long)src) - 2) % 16);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
479
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
480 const vector unsigned char neg1 = (const vector unsigned char)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
481 vec_splat_s8(-1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
482
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
483 vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
484 srcP2A, srcP2B, srcP3A, srcP3B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
485 srcM1A, srcM1B, srcM2A, srcM2B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
486 sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
487 pp1A, pp1B, pp2A, pp2B, psumA, psumB;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
488
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
489 const vector unsigned char dstperm = vec_lvsr(0, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
490
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
491 const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
492
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
493 const vector unsigned char mperm = (const vector unsigned char)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
494 AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
495 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
496 int16_t *tmpbis = tmp;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
497
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
498 vector signed short tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
499 tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
500 tmpP2ssA, tmpP2ssB;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
501
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
502 vector signed int pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
503 pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
504 pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
505 ssumAe, ssumAo, ssumBe, ssumBo;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
506 vector unsigned char fsum, sumv, sum, dst1, dst2, vdst,
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
507 rsum, fdst1, fdst2;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
508 vector signed short ssume, ssumo;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
509
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
510 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
511 src -= (2 * srcStride);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
512 for (i = 0 ; i < 21 ; i ++) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
513 vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
514 vector unsigned char srcR1 = vec_ld(-2, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
515 vector unsigned char srcR2 = vec_ld(14, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
516
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
517 switch (align) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
518 default: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
519 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
520 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
521 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
522 srcP1 = vec_perm(srcR1, srcR2, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
523 srcP2 = vec_perm(srcR1, srcR2, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
524 srcP3 = vec_perm(srcR1, srcR2, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
525 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
526 case 11: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
527 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
528 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
529 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
530 srcP1 = vec_perm(srcR1, srcR2, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
531 srcP2 = vec_perm(srcR1, srcR2, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
532 srcP3 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
533 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
534 case 12: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
535 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
536 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
537 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
538 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
539 srcP1 = vec_perm(srcR1, srcR2, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
540 srcP2 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
541 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
542 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
543 case 13: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
544 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
545 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
546 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
547 srcP0 = vec_perm(srcR1, srcR2, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
548 srcP1 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
549 srcP2 = vec_perm(srcR2, srcR3, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
550 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
551 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
552 case 14: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
553 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
554 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
555 srcM1 = vec_perm(srcR1, srcR2, permM1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
556 srcP0 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
557 srcP1 = vec_perm(srcR2, srcR3, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
558 srcP2 = vec_perm(srcR2, srcR3, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
559 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
560 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
561 case 15: {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
562 vector unsigned char srcR3 = vec_ld(30, src);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
563 srcM2 = vec_perm(srcR1, srcR2, permM2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
564 srcM1 = srcR2;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
565 srcP0 = vec_perm(srcR2, srcR3, permP0);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
566 srcP1 = vec_perm(srcR2, srcR3, permP1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
567 srcP2 = vec_perm(srcR2, srcR3, permP2);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
568 srcP3 = vec_perm(srcR2, srcR3, permP3);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
569 } break;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
570 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
571
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
572 srcP0A = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
573 vec_mergeh((vector unsigned char)vzero, srcP0);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
574 srcP0B = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
575 vec_mergel((vector unsigned char)vzero, srcP0);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
576 srcP1A = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
577 vec_mergeh((vector unsigned char)vzero, srcP1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
578 srcP1B = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
579 vec_mergel((vector unsigned char)vzero, srcP1);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
580
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
581 srcP2A = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
582 vec_mergeh((vector unsigned char)vzero, srcP2);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
583 srcP2B = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
584 vec_mergel((vector unsigned char)vzero, srcP2);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
585 srcP3A = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
586 vec_mergeh((vector unsigned char)vzero, srcP3);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
587 srcP3B = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
588 vec_mergel((vector unsigned char)vzero, srcP3);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
589
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
590 srcM1A = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
591 vec_mergeh((vector unsigned char)vzero, srcM1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
592 srcM1B = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
593 vec_mergel((vector unsigned char)vzero, srcM1);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
594 srcM2A = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
595 vec_mergeh((vector unsigned char)vzero, srcM2);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
596 srcM2B = (vector signed short)
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
597 vec_mergel((vector unsigned char)vzero, srcM2);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
598
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
599 sum1A = vec_adds(srcP0A, srcP1A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
600 sum1B = vec_adds(srcP0B, srcP1B);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
601 sum2A = vec_adds(srcM1A, srcP2A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
602 sum2B = vec_adds(srcM1B, srcP2B);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
603 sum3A = vec_adds(srcM2A, srcP3A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
604 sum3B = vec_adds(srcM2B, srcP3B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
605
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
606 pp1A = vec_mladd(sum1A, v20ss, sum3A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
607 pp1B = vec_mladd(sum1B, v20ss, sum3B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
608
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
609 pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
610 pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
611
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
612 psumA = vec_sub(pp1A, pp2A);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
613 psumB = vec_sub(pp1B, pp2B);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
614
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
615 vec_st(psumA, 0, tmp);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
616 vec_st(psumB, 16, tmp);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
617
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
618 src += srcStride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
619 tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
620 }
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
621
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
622 tmpM2ssA = vec_ld(0, tmpbis);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
623 tmpM2ssB = vec_ld(16, tmpbis);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
624 tmpbis += tmpStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
625 tmpM1ssA = vec_ld(0, tmpbis);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
626 tmpM1ssB = vec_ld(16, tmpbis);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
627 tmpbis += tmpStride;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
628 tmpP0ssA = vec_ld(0, tmpbis);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
629 tmpP0ssB = vec_ld(16, tmpbis);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
630 tmpbis += tmpStride;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
631 tmpP1ssA = vec_ld(0, tmpbis);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
632 tmpP1ssB = vec_ld(16, tmpbis);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
633 tmpbis += tmpStride;
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
634 tmpP2ssA = vec_ld(0, tmpbis);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
635 tmpP2ssB = vec_ld(16, tmpbis);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
636 tmpbis += tmpStride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
637
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
638 for (i = 0 ; i < 16 ; i++) {
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
639 const vector signed short tmpP3ssA = vec_ld(0, tmpbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
640 const vector signed short tmpP3ssB = vec_ld(16, tmpbis);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
641
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
642 const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
643 const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
644 const vector signed short sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
645 const vector signed short sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
646 const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
647 const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
648
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
649 tmpbis += tmpStride;
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
650
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
651 tmpM2ssA = tmpM1ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
652 tmpM2ssB = tmpM1ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
653 tmpM1ssA = tmpP0ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
654 tmpM1ssB = tmpP0ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
655 tmpP0ssA = tmpP1ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
656 tmpP0ssB = tmpP1ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
657 tmpP1ssA = tmpP2ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
658 tmpP1ssB = tmpP2ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
659 tmpP2ssA = tmpP3ssA;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
660 tmpP2ssB = tmpP3ssB;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
661
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
662 pp1Ae = vec_mule(sum1A, v20ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
663 pp1Ao = vec_mulo(sum1A, v20ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
664 pp1Be = vec_mule(sum1B, v20ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
665 pp1Bo = vec_mulo(sum1B, v20ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
666
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
667 pp2Ae = vec_mule(sum2A, v5ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
668 pp2Ao = vec_mulo(sum2A, v5ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
669 pp2Be = vec_mule(sum2B, v5ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
670 pp2Bo = vec_mulo(sum2B, v5ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
671
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
672 pp3Ae = vec_sra((vector signed int)sum3A, v16ui);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
673 pp3Ao = vec_mulo(sum3A, v1ss);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
674 pp3Be = vec_sra((vector signed int)sum3B, v16ui);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
675 pp3Bo = vec_mulo(sum3B, v1ss);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
676
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
677 pp1cAe = vec_add(pp1Ae, v512si);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
678 pp1cAo = vec_add(pp1Ao, v512si);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
679 pp1cBe = vec_add(pp1Be, v512si);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
680 pp1cBo = vec_add(pp1Bo, v512si);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
681
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
682 pp32Ae = vec_sub(pp3Ae, pp2Ae);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
683 pp32Ao = vec_sub(pp3Ao, pp2Ao);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
684 pp32Be = vec_sub(pp3Be, pp2Be);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
685 pp32Bo = vec_sub(pp3Bo, pp2Bo);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
686
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
687 sumAe = vec_add(pp1cAe, pp32Ae);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
688 sumAo = vec_add(pp1cAo, pp32Ao);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
689 sumBe = vec_add(pp1cBe, pp32Be);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
690 sumBo = vec_add(pp1cBo, pp32Bo);
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2236
diff changeset
691
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
692 ssumAe = vec_sra(sumAe, v10ui);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
693 ssumAo = vec_sra(sumAo, v10ui);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
694 ssumBe = vec_sra(sumBe, v10ui);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
695 ssumBo = vec_sra(sumBo, v10ui);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
696
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
697 ssume = vec_packs(ssumAe, ssumBe);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
698 ssumo = vec_packs(ssumAo, ssumBo);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
699
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
700 sumv = vec_packsu(ssume, ssumo);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
701 sum = vec_perm(sumv, sumv, mperm);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
702
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
703 dst1 = vec_ld(0, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
704 dst2 = vec_ld(16, dst);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
705 vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
706
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
707 OP_U8_ALTIVEC(fsum, sum, vdst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
708
3346
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
709 rsum = vec_perm(fsum, fsum, dstperm);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
710 fdst1 = vec_sel(dst1, rsum, dstmask);
052765f11f1c Cosmetics: should not hurt performance, scream if are
lu_zero
parents: 3153
diff changeset
711 fdst2 = vec_sel(rsum, dst2, dstmask);
2236
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
712
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
713 vec_st(fdst1, 0, dst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
714 vec_st(fdst2, 16, dst);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
715
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
716 dst += dstStride;
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
717 }
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
718 POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
b0102ea621dd h264 qpel mc, size 16 patch by (Romain Dolbeau <dolbeau at caps-entreprise dot com>)
michael
parents:
diff changeset
719 }