comparison x86/dsputil_mmx.c @ 10964:abb3b23bda35 libavcodec

Implement an sse version of scalarproduct_float().
author alexc
date Fri, 22 Jan 2010 23:07:58 +0000
parents 34a65026fa06
children 001eb7e3e2d3
comparison
equal deleted inserted replaced
10963:81033a080136 10964:abb3b23bda35
2508 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); 2508 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
2509 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, 2509 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2510 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); 2510 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
2511 2511
2512 2512
2513 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
2514
2513 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) 2515 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2514 { 2516 {
2515 mm_flags = mm_support(); 2517 mm_flags = mm_support();
2516 2518
2517 if (avctx->dsp_mask) { 2519 if (avctx->dsp_mask) {
2963 c->vector_fmul_window = vector_fmul_window_sse; 2965 c->vector_fmul_window = vector_fmul_window_sse;
2964 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; 2966 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
2965 c->vector_clipf = vector_clipf_sse; 2967 c->vector_clipf = vector_clipf_sse;
2966 c->float_to_int16 = float_to_int16_sse; 2968 c->float_to_int16 = float_to_int16_sse;
2967 c->float_to_int16_interleave = float_to_int16_interleave_sse; 2969 c->float_to_int16_interleave = float_to_int16_interleave_sse;
2970 #if HAVE_YASM
2971 c->scalarproduct_float = ff_scalarproduct_float_sse;
2972 #endif
2968 } 2973 }
2969 if(mm_flags & FF_MM_3DNOW) 2974 if(mm_flags & FF_MM_3DNOW)
2970 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse 2975 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
2971 if(mm_flags & FF_MM_SSE2){ 2976 if(mm_flags & FF_MM_SSE2){
2972 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; 2977 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;