comparison x86/vp8dsp-init.c @ 12204:563339ea87aa libavcodec

Chroma (width=8) inner loopfilter MMX/MMX2/SSE2 for VP8 decoder.
author rbultje
date Tue, 20 Jul 2010 22:04:18 +0000
parents 677570e65a75
children d38e8565ba05
comparison
equal deleted inserted replaced
12203:a2c993c7ae90 12204:563339ea87aa
240 int e, int i, int hvt); 240 int e, int i, int hvt);
241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, 241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
242 int e, int i, int hvt); 242 int e, int i, int hvt);
243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, 243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride,
244 int e, int i, int hvt); 244 int e, int i, int hvt);
245
246 extern void ff_vp8_v_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
247 int s, int e, int i, int hvt);
248 extern void ff_vp8_v_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
249 int s, int e, int i, int hvt);
250 extern void ff_vp8_v_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
251 int s, int e, int i, int hvt);
252 extern void ff_vp8_h_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
253 int s, int e, int i, int hvt);
254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
255 int s, int e, int i, int hvt);
256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
257 int s, int e, int i, int hvt);
245 #endif 258 #endif
246 259
247 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ 260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
248 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ 261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
249 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ 262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
284 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; 297 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
285 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; 298 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
286 299
287 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; 300 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
288 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; 301 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
302 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
303 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
289 } 304 }
290 305
291 /* note that 4-tap width=16 functions are missing because w=16 306 /* note that 4-tap width=16 functions are missing because w=16
292 * is only used for luma, and luma is always a copy or sixtap. */ 307 * is only used for luma, and luma is always a copy or sixtap. */
293 if (mm_flags & FF_MM_MMX2) { 308 if (mm_flags & FF_MM_MMX2) {
302 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; 317 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
303 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; 318 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
304 319
305 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; 320 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
306 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; 321 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
322 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
323 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
307 } 324 }
308 325
309 if (mm_flags & FF_MM_SSE) { 326 if (mm_flags & FF_MM_SSE) {
310 c->put_vp8_epel_pixels_tab[0][0][0] = 327 c->put_vp8_epel_pixels_tab[0][0][0] =
311 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; 328 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
319 336
320 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; 337 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
321 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; 338 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
322 339
323 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; 340 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
341 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
324 } 342 }
325 343
326 if (mm_flags & FF_MM_SSE2) { 344 if (mm_flags & FF_MM_SSE2) {
327 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; 345 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
346 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
328 } 347 }
329 348
330 if (mm_flags & FF_MM_SSSE3) { 349 if (mm_flags & FF_MM_SSSE3) {
331 VP8_LUMA_MC_FUNC(0, 16, ssse3); 350 VP8_LUMA_MC_FUNC(0, 16, ssse3);
332 VP8_MC_FUNC(1, 8, ssse3); 351 VP8_MC_FUNC(1, 8, ssse3);