comparison x86/vp8dsp-init.c @ 12196:552c7c10bc73 libavcodec

Implement chroma (width=8) inner loopfilter MMX/MMX2/SSE2 functions.
author rbultje
date Mon, 19 Jul 2010 21:53:28 +0000
parents 80b142c2e9f7
children fbf4d5b1b664
comparison
equal deleted inserted replaced
12195:e7847fcff0f4 12196:552c7c10bc73
240 int e, int i, int hvt); 240 int e, int i, int hvt);
241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, 241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
242 int e, int i, int hvt); 242 int e, int i, int hvt);
243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, 243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride,
244 int e, int i, int hvt); 244 int e, int i, int hvt);
245
246 extern void ff_vp8_v_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
247 int s, int e, int i, int hvt);
248 extern void ff_vp8_v_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
249 int s, int e, int i, int hvt);
250 extern void ff_vp8_v_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
251 int s, int e, int i, int hvt);
252 extern void ff_vp8_h_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
253 int s, int e, int i, int hvt);
254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
255 int s, int e, int i, int hvt);
256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
257 int s, int e, int i, int hvt);
245 #endif 258 #endif
246 259
247 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ 260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
248 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ 261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
249 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ 262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
284 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; 297 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
285 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; 298 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
286 299
287 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; 300 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
288 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; 301 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
302 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
303 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
289 } 304 }
290 305
291 /* note that 4-tap width=16 functions are missing because w=16 306 /* note that 4-tap width=16 functions are missing because w=16
292 * is only used for luma, and luma is always a copy or sixtap. */ 307 * is only used for luma, and luma is always a copy or sixtap. */
293 if (mm_flags & FF_MM_MMX2) { 308 if (mm_flags & FF_MM_MMX2) {
302 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; 317 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
303 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; 318 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
304 319
305 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; 320 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
306 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; 321 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
322 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
323 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
307 } 324 }
308 325
309 if (mm_flags & FF_MM_SSE) { 326 if (mm_flags & FF_MM_SSE) {
310 c->put_vp8_epel_pixels_tab[0][0][0] = 327 c->put_vp8_epel_pixels_tab[0][0][0] =
311 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; 328 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
320 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; 337 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
321 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; 338 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
322 339
323 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; 340 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
324 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; 341 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
342 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
343 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
325 } 344 }
326 345
327 if (mm_flags & FF_MM_SSSE3) { 346 if (mm_flags & FF_MM_SSSE3) {
328 VP8_LUMA_MC_FUNC(0, 16, ssse3); 347 VP8_LUMA_MC_FUNC(0, 16, ssse3);
329 VP8_MC_FUNC(1, 8, ssse3); 348 VP8_MC_FUNC(1, 8, ssse3);