comparison x86/vp8dsp-init.c @ 12054:b8f80fe02861 libavcodec

SSSE3 versions of width4 VP8 6-tap MC functions Also make some small changes to saturation order of 4-tap SSSE3 MC to fix a non-bitexactness bug. Patch mostly by Eli Friedman <eli.friedman AT gmail DOT com>.
author darkshikari
date Fri, 02 Jul 2010 05:27:41 +0000
parents dc4feabd4dab
children 8527154f6e81
comparison
equal deleted inserted replaced
12053:aa0b01031adf 12054:b8f80fe02861
52 int height, int mx, int my); 52 int height, int mx, int my);
53 extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, int dststride, 53 extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, int dststride,
54 uint8_t *src, int srcstride, 54 uint8_t *src, int srcstride,
55 int height, int mx, int my); 55 int height, int mx, int my);
56 56
57 extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
58 uint8_t *src, int srcstride,
59 int height, int mx, int my);
60 extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
61 uint8_t *src, int srcstride,
62 int height, int mx, int my);
63 extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
64 uint8_t *src, int srcstride,
65 int height, int mx, int my);
66 extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
67 uint8_t *src, int srcstride,
68 int height, int mx, int my);
57 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride, 69 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
58 uint8_t *src, int srcstride, 70 uint8_t *src, int srcstride,
59 int height, int mx, int my); 71 int height, int mx, int my);
60 extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride, 72 extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride,
61 uint8_t *src, int srcstride, 73 uint8_t *src, int srcstride,
171 HVTAPSSE2(4, 6, 8) 183 HVTAPSSE2(4, 6, 8)
172 HVTAPSSE2(6, 4, 8) 184 HVTAPSSE2(6, 4, 8)
173 HVTAPSSE2(6, 6, 8) 185 HVTAPSSE2(6, 6, 8)
174 HVTAPSSE2(6, 6, 16) 186 HVTAPSSE2(6, 6, 16)
175 187
188 HVTAP(ssse3, 16, 4, 4, 4, 8)
189 HVTAP(ssse3, 16, 4, 6, 4, 8)
190 HVTAP(ssse3, 16, 6, 4, 4, 8)
191 HVTAP(ssse3, 16, 6, 6, 4, 8)
192
176 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \ 193 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
177 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ 194 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
178 uint8_t *dst, int dststride, uint8_t *src, \ 195 uint8_t *dst, int dststride, uint8_t *src, \
179 int srcstride, int height, int mx, int my) \ 196 int srcstride, int height, int mx, int my) \
180 { \ 197 { \
262 } 279 }
263 280
264 if (mm_flags & FF_MM_SSSE3) { 281 if (mm_flags & FF_MM_SSSE3) {
265 VP8_LUMA_MC_FUNC(0, 16, ssse3); 282 VP8_LUMA_MC_FUNC(0, 16, ssse3);
266 VP8_MC_FUNC(1, 8, ssse3); 283 VP8_MC_FUNC(1, 8, ssse3);
284 VP8_MC_FUNC(2, 4, ssse3);
267 VP8_BILINEAR_MC_FUNC(0, 16, ssse3); 285 VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
268 VP8_BILINEAR_MC_FUNC(1, 8, ssse3); 286 VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
269 } 287 }
270 288
271 if (mm_flags & FF_MM_SSE4) { 289 if (mm_flags & FF_MM_SSE4) {