Mercurial > libavcodec.hg
comparison x86/vp8dsp-init.c @ 12196:552c7c10bc73 libavcodec
Implement chroma (width=8) inner loopfilter MMX/MMX2/SSE2 functions.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 21:53:28 +0000 |
parents | 80b142c2e9f7 |
children | fbf4d5b1b664 |
comparison
equal
deleted
inserted
replaced
12195:e7847fcff0f4 | 12196:552c7c10bc73 |
---|---|
240 int e, int i, int hvt); | 240 int e, int i, int hvt); |
241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, | 241 extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, |
242 int e, int i, int hvt); | 242 int e, int i, int hvt); |
243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, | 243 extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, |
244 int e, int i, int hvt); | 244 int e, int i, int hvt); |
245 | |
246 extern void ff_vp8_v_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV, | |
247 int s, int e, int i, int hvt); | |
248 extern void ff_vp8_v_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV, | |
249 int s, int e, int i, int hvt); | |
250 extern void ff_vp8_v_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV, | |
251 int s, int e, int i, int hvt); | |
252 extern void ff_vp8_h_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV, | |
253 int s, int e, int i, int hvt); | |
254 extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV, | |
255 int s, int e, int i, int hvt); | |
256 extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV, | |
257 int s, int e, int i, int hvt); | |
245 #endif | 258 #endif |
246 | 259 |
247 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ | 260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ |
248 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ | 261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ |
249 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ | 262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ |
284 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; | 297 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; |
285 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; | 298 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; |
286 | 299 |
287 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; | 300 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; |
288 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; | 301 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; |
302 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; | |
303 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; | |
289 } | 304 } |
290 | 305 |
291 /* note that 4-tap width=16 functions are missing because w=16 | 306 /* note that 4-tap width=16 functions are missing because w=16 |
292 * is only used for luma, and luma is always a copy or sixtap. */ | 307 * is only used for luma, and luma is always a copy or sixtap. */ |
293 if (mm_flags & FF_MM_MMX2) { | 308 if (mm_flags & FF_MM_MMX2) { |
302 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; | 317 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; |
303 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; | 318 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; |
304 | 319 |
305 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; | 320 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; |
306 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; | 321 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; |
322 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; | |
323 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; | |
307 } | 324 } |
308 | 325 |
309 if (mm_flags & FF_MM_SSE) { | 326 if (mm_flags & FF_MM_SSE) { |
310 c->put_vp8_epel_pixels_tab[0][0][0] = | 327 c->put_vp8_epel_pixels_tab[0][0][0] = |
311 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; | 328 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; |
320 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; | 337 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; |
321 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; | 338 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; |
322 | 339 |
323 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; | 340 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; |
324 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; | 341 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; |
342 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; | |
343 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; | |
325 } | 344 } |
326 | 345 |
327 if (mm_flags & FF_MM_SSSE3) { | 346 if (mm_flags & FF_MM_SSSE3) { |
328 VP8_LUMA_MC_FUNC(0, 16, ssse3); | 347 VP8_LUMA_MC_FUNC(0, 16, ssse3); |
329 VP8_MC_FUNC(1, 8, ssse3); | 348 VP8_MC_FUNC(1, 8, ssse3); |