annotate ppc/vp8dsp_altivec.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 9fef0a8ddd63
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12011
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
1 /**
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
2 * VP8 compatible video decoder
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
3 *
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
4 * Copyright (C) 2010 David Conrad
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
5 *
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
6 * This file is part of FFmpeg.
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
7 *
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
12 *
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
16 * Lesser General Public License for more details.
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
17 *
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
21 */
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
22
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12473
diff changeset
23 #include "libavutil/cpu.h"
12011
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
24 #include "libavcodec/vp8dsp.h"
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
25 #include "dsputil_altivec.h"
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
26 #include "types_altivec.h"
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
27 #include "util_altivec.h"
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
28
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
29 #define REPT4(...) { __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__ }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
30
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
31 // h subpel filter uses msum to multiply+add 4 pixel taps at once
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
32 static const vec_s8 h_subpel_filters_inner[7] =
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
33 {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
34 REPT4( -6, 123, 12, -1),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
35 REPT4(-11, 108, 36, -8),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
36 REPT4( -9, 93, 50, -6),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
37 REPT4(-16, 77, 77, -16),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
38 REPT4( -6, 50, 93, -9),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
39 REPT4( -8, 36, 108, -11),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
40 REPT4( -1, 12, 123, -6),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
41 };
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
42
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
43 // for 6tap filters, these are the outer two taps
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
44 // The zeros mask off pixels 4-7 when filtering 0-3
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
45 // and vice-versa
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
46 static const vec_s8 h_subpel_filters_outer[3] =
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
47 {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
48 REPT4(0, 0, 2, 1),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
49 REPT4(0, 0, 3, 3),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
50 REPT4(0, 0, 1, 2),
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
51 };
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
52
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
53 #define LOAD_H_SUBPEL_FILTER(i) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
54 vec_s8 filter_inner = h_subpel_filters_inner[i]; \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
55 vec_s8 filter_outerh = h_subpel_filters_outer[(i)>>1]; \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
56 vec_s8 filter_outerl = vec_sld(filter_outerh, filter_outerh, 2)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
57
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
58 #define FILTER_H(dstv, off) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
59 a = vec_ld((off)-2, src); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
60 b = vec_ld((off)-2+15, src); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
61 \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
62 pixh = vec_perm(a, b, permh##off); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
63 pixl = vec_perm(a, b, perml##off); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
64 filth = vec_msum(filter_inner, pixh, c64); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
65 filtl = vec_msum(filter_inner, pixl, c64); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
66 \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
67 if (is6tap) { \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
68 outer = vec_perm(a, b, perm_6tap##off); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
69 filth = vec_msum(filter_outerh, outer, filth); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
70 filtl = vec_msum(filter_outerl, outer, filtl); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
71 } \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
72 if (w == 4) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
73 filtl = filth; /* discard pixels 4-7 */ \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
74 dstv = vec_packs(filth, filtl); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
75 dstv = vec_sra(dstv, c7)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
76
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
77 static av_always_inline
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
78 void put_vp8_epel_h_altivec_core(uint8_t *dst, int dst_stride,
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
79 uint8_t *src, int src_stride,
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
80 int h, int mx, int w, int is6tap)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
81 {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
82 LOAD_H_SUBPEL_FILTER(mx-1);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
83 vec_u8 align_vec0, align_vec8, permh0, permh8, filt;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
84 vec_u8 perm_6tap0, perm_6tap8, perml0, perml8;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
85 vec_u8 a, b, pixh, pixl, outer;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
86 vec_s16 f16h, f16l;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
87 vec_s32 filth, filtl;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
88
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
89 vec_u8 perm_inner = { 1,2,3,4, 2,3,4,5, 3,4,5,6, 4,5,6,7 };
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
90 vec_u8 perm_outer = { 4,9, 0,5, 5,10, 1,6, 6,11, 2,7, 7,12, 3,8 };
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
91 vec_s32 c64 = vec_sl(vec_splat_s32(1), vec_splat_u32(6));
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
92 vec_u16 c7 = vec_splat_u16(7);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
93
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
94 align_vec0 = vec_lvsl( -2, src);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
95 align_vec8 = vec_lvsl(8-2, src);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
96
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
97 permh0 = vec_perm(align_vec0, align_vec0, perm_inner);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
98 permh8 = vec_perm(align_vec8, align_vec8, perm_inner);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
99 perm_inner = vec_add(perm_inner, vec_splat_u8(4));
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
100 perml0 = vec_perm(align_vec0, align_vec0, perm_inner);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
101 perml8 = vec_perm(align_vec8, align_vec8, perm_inner);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
102 perm_6tap0 = vec_perm(align_vec0, align_vec0, perm_outer);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
103 perm_6tap8 = vec_perm(align_vec8, align_vec8, perm_outer);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
104
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
105 while (h --> 0) {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
106 FILTER_H(f16h, 0);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
107
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
108 if (w == 16) {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
109 FILTER_H(f16l, 8);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
110 filt = vec_packsu(f16h, f16l);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
111 vec_st(filt, 0, dst);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
112 } else {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
113 filt = vec_packsu(f16h, f16h);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
114 vec_ste((vec_u32)filt, 0, (uint32_t*)dst);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
115 if (w == 8)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
116 vec_ste((vec_u32)filt, 4, (uint32_t*)dst);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
117 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
118 src += src_stride;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
119 dst += dst_stride;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
120 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
121 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
122
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
123 // v subpel filter does a simple vertical multiply + add
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
124 static const vec_u8 v_subpel_filters[7] =
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
125 {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
126 { 0, 6, 123, 12, 1, 0 },
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
127 { 2, 11, 108, 36, 8, 1 },
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
128 { 0, 9, 93, 50, 6, 0 },
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
129 { 3, 16, 77, 77, 16, 3 },
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
130 { 0, 6, 50, 93, 9, 0 },
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
131 { 1, 8, 36, 108, 11, 2 },
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
132 { 0, 1, 12, 123, 6, 0 },
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
133 };
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
134
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
135 #define LOAD_V_SUBPEL_FILTER(i) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
136 vec_u8 subpel_filter = v_subpel_filters[i]; \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
137 vec_u8 f0 = vec_splat(subpel_filter, 0); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
138 vec_u8 f1 = vec_splat(subpel_filter, 1); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
139 vec_u8 f2 = vec_splat(subpel_filter, 2); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
140 vec_u8 f3 = vec_splat(subpel_filter, 3); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
141 vec_u8 f4 = vec_splat(subpel_filter, 4); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
142 vec_u8 f5 = vec_splat(subpel_filter, 5)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
143
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
144 #define FILTER_V(dstv, vec_mul) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
145 s1f = (vec_s16)vec_mul(s1, f1); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
146 s2f = (vec_s16)vec_mul(s2, f2); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
147 s3f = (vec_s16)vec_mul(s3, f3); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
148 s4f = (vec_s16)vec_mul(s4, f4); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
149 s2f = vec_subs(s2f, s1f); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
150 s3f = vec_subs(s3f, s4f); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
151 if (is6tap) { \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
152 s0f = (vec_s16)vec_mul(s0, f0); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
153 s5f = (vec_s16)vec_mul(s5, f5); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
154 s2f = vec_adds(s2f, s0f); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
155 s3f = vec_adds(s3f, s5f); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
156 } \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
157 dstv = vec_adds(s2f, s3f); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
158 dstv = vec_adds(dstv, c64); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
159 dstv = vec_sra(dstv, c7)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
160
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
161 static av_always_inline
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
162 void put_vp8_epel_v_altivec_core(uint8_t *dst, int dst_stride,
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
163 uint8_t *src, int src_stride,
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
164 int h, int my, int w, int is6tap)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
165 {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
166 LOAD_V_SUBPEL_FILTER(my-1);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
167 vec_u8 s0, s1, s2, s3, s4, s5, filt, align_vech, perm_vec, align_vecl;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
168 vec_s16 s0f, s1f, s2f, s3f, s4f, s5f, f16h, f16l;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
169 vec_s16 c64 = vec_sl(vec_splat_s16(1), vec_splat_u16(6));
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
170 vec_u16 c7 = vec_splat_u16(7);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
171
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
172 // we want pixels 0-7 to be in the even positions and 8-15 in the odd,
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
173 // so combine this permute with the alignment permute vector
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
174 align_vech = vec_lvsl(0, src);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
175 align_vecl = vec_sld(align_vech, align_vech, 8);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
176 if (w ==16)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
177 perm_vec = vec_mergeh(align_vech, align_vecl);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
178 else
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
179 perm_vec = vec_mergeh(align_vech, align_vech);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
180
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
181 if (is6tap)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
182 s0 = load_with_perm_vec(-2*src_stride, src, perm_vec);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
183 s1 = load_with_perm_vec(-1*src_stride, src, perm_vec);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
184 s2 = load_with_perm_vec( 0*src_stride, src, perm_vec);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
185 s3 = load_with_perm_vec( 1*src_stride, src, perm_vec);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
186 if (is6tap)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
187 s4 = load_with_perm_vec( 2*src_stride, src, perm_vec);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
188
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
189 src += (2+is6tap)*src_stride;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
190
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
191 while (h --> 0) {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
192 if (is6tap)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
193 s5 = load_with_perm_vec(0, src, perm_vec);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
194 else
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
195 s4 = load_with_perm_vec(0, src, perm_vec);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
196
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
197 FILTER_V(f16h, vec_mule);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
198
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
199 if (w == 16) {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
200 FILTER_V(f16l, vec_mulo);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
201 filt = vec_packsu(f16h, f16l);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
202 vec_st(filt, 0, dst);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
203 } else {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
204 filt = vec_packsu(f16h, f16h);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
205 if (w == 4)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
206 filt = (vec_u8)vec_splat((vec_u32)filt, 0);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
207 else
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
208 vec_ste((vec_u32)filt, 4, (uint32_t*)dst);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
209 vec_ste((vec_u32)filt, 0, (uint32_t*)dst);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
210 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
211
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
212 if (is6tap)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
213 s0 = s1;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
214 s1 = s2;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
215 s2 = s3;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
216 s3 = s4;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
217 if (is6tap)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
218 s4 = s5;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
219
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
220 dst += dst_stride;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
221 src += src_stride;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
222 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
223 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
224
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
225 #define EPEL_FUNCS(WIDTH, TAPS) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
226 static av_noinline \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
227 void put_vp8_epel ## WIDTH ## _h ## TAPS ## _altivec(uint8_t *dst, int dst_stride, uint8_t *src, int src_stride, int h, int mx, int my) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
228 { \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
229 put_vp8_epel_h_altivec_core(dst, dst_stride, src, src_stride, h, mx, WIDTH, TAPS == 6); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
230 } \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
231 \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
232 static av_noinline \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
233 void put_vp8_epel ## WIDTH ## _v ## TAPS ## _altivec(uint8_t *dst, int dst_stride, uint8_t *src, int src_stride, int h, int mx, int my) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
234 { \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
235 put_vp8_epel_v_altivec_core(dst, dst_stride, src, src_stride, h, my, WIDTH, TAPS == 6); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
236 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
237
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
238 #define EPEL_HV(WIDTH, HTAPS, VTAPS) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
239 static void put_vp8_epel ## WIDTH ## _h ## HTAPS ## v ## VTAPS ## _altivec(uint8_t *dst, int stride, uint8_t *src, int s, int h, int mx, int my) \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
240 { \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
241 DECLARE_ALIGNED(16, uint8_t, tmp)[(2*WIDTH+5)*16]; \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
242 put_vp8_epel ## WIDTH ## _h ## HTAPS ## _altivec(tmp, 16, src-2*stride, stride, h+5, mx, my); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
243 put_vp8_epel ## WIDTH ## _v ## VTAPS ## _altivec(dst, stride, tmp+2*16, 16, h, mx, my); \
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
244 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
245
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
246 EPEL_FUNCS(16,6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
247 EPEL_FUNCS(8, 6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
248 EPEL_FUNCS(8, 4)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
249 EPEL_FUNCS(4, 6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
250 EPEL_FUNCS(4, 4)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
251
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
252 EPEL_HV(16, 6,6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
253 EPEL_HV(8, 6,6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
254 EPEL_HV(8, 4,6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
255 EPEL_HV(8, 6,4)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
256 EPEL_HV(8, 4,4)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
257 EPEL_HV(4, 6,6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
258 EPEL_HV(4, 4,6)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
259 EPEL_HV(4, 6,4)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
260 EPEL_HV(4, 4,4)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
261
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
262 static void put_vp8_pixels16_altivec(uint8_t *dst, int stride, uint8_t *src, int s, int h, int mx, int my)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
263 {
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
264 put_pixels16_altivec(dst, src, stride, h);
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
265 }
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
266
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
267 av_cold void ff_vp8dsp_init_altivec(VP8DSPContext *c)
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
268 {
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12473
diff changeset
269 if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
12011
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
270 return;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
271
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
272 c->put_vp8_epel_pixels_tab[0][0][0] = put_vp8_pixels16_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
273 c->put_vp8_epel_pixels_tab[0][0][2] = put_vp8_epel16_h6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
274 c->put_vp8_epel_pixels_tab[0][2][0] = put_vp8_epel16_v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
275 c->put_vp8_epel_pixels_tab[0][2][2] = put_vp8_epel16_h6v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
276
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
277 c->put_vp8_epel_pixels_tab[1][0][2] = put_vp8_epel8_h6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
278 c->put_vp8_epel_pixels_tab[1][2][0] = put_vp8_epel8_v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
279 c->put_vp8_epel_pixels_tab[1][0][1] = put_vp8_epel8_h4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
280 c->put_vp8_epel_pixels_tab[1][1][0] = put_vp8_epel8_v4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
281
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
282 c->put_vp8_epel_pixels_tab[1][2][2] = put_vp8_epel8_h6v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
283 c->put_vp8_epel_pixels_tab[1][1][1] = put_vp8_epel8_h4v4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
284 c->put_vp8_epel_pixels_tab[1][1][2] = put_vp8_epel8_h6v4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
285 c->put_vp8_epel_pixels_tab[1][2][1] = put_vp8_epel8_h4v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
286
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
287 c->put_vp8_epel_pixels_tab[2][0][2] = put_vp8_epel4_h6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
288 c->put_vp8_epel_pixels_tab[2][2][0] = put_vp8_epel4_v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
289 c->put_vp8_epel_pixels_tab[2][0][1] = put_vp8_epel4_h4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
290 c->put_vp8_epel_pixels_tab[2][1][0] = put_vp8_epel4_v4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
291
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
292 c->put_vp8_epel_pixels_tab[2][2][2] = put_vp8_epel4_h6v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
293 c->put_vp8_epel_pixels_tab[2][1][1] = put_vp8_epel4_h4v4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
294 c->put_vp8_epel_pixels_tab[2][1][2] = put_vp8_epel4_h6v4_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
295 c->put_vp8_epel_pixels_tab[2][2][1] = put_vp8_epel4_h4v6_altivec;
f96187e79438 Altivec VP8 MC functions
conrad
parents:
diff changeset
296 }