comparison x86/vp8dsp-init.c @ 12198:677570e65a75 libavcodec

Revert r24339 (it causes fate failures on x86-64) - I'll figure out what's wrong with it tomorrow or so, then re-submit.
author rbultje
date Mon, 19 Jul 2010 23:57:09 +0000
parents fbf4d5b1b664
children 563339ea87aa
comparison
equal deleted inserted replaced
12197:fbf4d5b1b664 12198:677570e65a75
240 int e, int i, int hvt); 240 int e, int i, int hvt);
241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, 241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
242 int e, int i, int hvt); 242 int e, int i, int hvt);
243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, 243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride,
244 int e, int i, int hvt); 244 int e, int i, int hvt);
245
246 extern void ff_vp8_v_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
247 int s, int e, int i, int hvt);
248 extern void ff_vp8_v_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
249 int s, int e, int i, int hvt);
250 extern void ff_vp8_v_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
251 int s, int e, int i, int hvt);
252 extern void ff_vp8_h_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
253 int s, int e, int i, int hvt);
254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
255 int s, int e, int i, int hvt);
256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
257 int s, int e, int i, int hvt);
258 #endif 245 #endif
259 246
260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ 247 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ 248 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ 249 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
297 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; 284 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
298 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; 285 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
299 286
300 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; 287 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
301 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; 288 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
302 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
303 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
304 } 289 }
305 290
306 /* note that 4-tap width=16 functions are missing because w=16 291 /* note that 4-tap width=16 functions are missing because w=16
307 * is only used for luma, and luma is always a copy or sixtap. */ 292 * is only used for luma, and luma is always a copy or sixtap. */
308 if (mm_flags & FF_MM_MMX2) { 293 if (mm_flags & FF_MM_MMX2) {
317 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; 302 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
318 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; 303 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
319 304
320 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; 305 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
321 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; 306 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
322 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
323 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
324 } 307 }
325 308
326 if (mm_flags & FF_MM_SSE) { 309 if (mm_flags & FF_MM_SSE) {
327 c->put_vp8_epel_pixels_tab[0][0][0] = 310 c->put_vp8_epel_pixels_tab[0][0][0] =
328 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; 311 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
336 319
337 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; 320 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
338 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; 321 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
339 322
340 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; 323 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
341 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
342 } 324 }
343 325
344 if (mm_flags & FF_MM_SSE2) { 326 if (mm_flags & FF_MM_SSE2) {
345 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; 327 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
346 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
347 } 328 }
348 329
349 if (mm_flags & FF_MM_SSSE3) { 330 if (mm_flags & FF_MM_SSSE3) {
350 VP8_LUMA_MC_FUNC(0, 16, ssse3); 331 VP8_LUMA_MC_FUNC(0, 16, ssse3);
351 VP8_MC_FUNC(1, 8, ssse3); 332 VP8_MC_FUNC(1, 8, ssse3);