Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 4197:bbe0bc387a19 libavcodec
revert bad checkin
author | mru |
---|---|
date | Tue, 14 Nov 2006 03:18:09 +0000 |
parents | fbac0859753d |
children | d3e389536b0a |
comparison
equal
deleted
inserted
replaced
4196:fbac0859753d | 4197:bbe0bc387a19 |
---|---|
32 //#include <assert.h> | 32 //#include <assert.h> |
33 | 33 |
34 extern void ff_idct_xvid_mmx(short *block); | 34 extern void ff_idct_xvid_mmx(short *block); |
35 extern void ff_idct_xvid_mmx2(short *block); | 35 extern void ff_idct_xvid_mmx2(short *block); |
36 | 36 |
37 int ff_mm_flags; /* multimedia extension flags */ | 37 int mm_flags; /* multimedia extension flags */ |
38 | 38 |
39 /* pixel operations */ | 39 /* pixel operations */ |
40 static const uint64_t mm_bone attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL; | 40 static const uint64_t mm_bone attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL; |
41 static const uint64_t mm_wone attribute_used __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | 41 static const uint64_t mm_wone attribute_used __attribute__ ((aligned(8))) = 0x0001000100010001ULL; |
42 static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x0002000200020002ULL; | 42 static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x0002000200020002ULL; |
3048 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | 3048 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); |
3049 #endif | 3049 #endif |
3050 | 3050 |
3051 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | 3051 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
3052 { | 3052 { |
3053 ff_mm_flags = mm_support(); | 3053 mm_flags = mm_support(); |
3054 | 3054 |
3055 if (avctx->dsp_mask) { | 3055 if (avctx->dsp_mask) { |
3056 if (avctx->dsp_mask & FF_MM_FORCE) | 3056 if (avctx->dsp_mask & FF_MM_FORCE) |
3057 ff_mm_flags |= (avctx->dsp_mask & 0xffff); | 3057 mm_flags |= (avctx->dsp_mask & 0xffff); |
3058 else | 3058 else |
3059 ff_mm_flags &= ~(avctx->dsp_mask & 0xffff); | 3059 mm_flags &= ~(avctx->dsp_mask & 0xffff); |
3060 } | 3060 } |
3061 | 3061 |
3062 #if 0 | 3062 #if 0 |
3063 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); | 3063 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); |
3064 if (ff_mm_flags & MM_MMX) | 3064 if (mm_flags & MM_MMX) |
3065 av_log(avctx, AV_LOG_INFO, " mmx"); | 3065 av_log(avctx, AV_LOG_INFO, " mmx"); |
3066 if (ff_mm_flags & MM_MMXEXT) | 3066 if (mm_flags & MM_MMXEXT) |
3067 av_log(avctx, AV_LOG_INFO, " mmxext"); | 3067 av_log(avctx, AV_LOG_INFO, " mmxext"); |
3068 if (ff_mm_flags & MM_3DNOW) | 3068 if (mm_flags & MM_3DNOW) |
3069 av_log(avctx, AV_LOG_INFO, " 3dnow"); | 3069 av_log(avctx, AV_LOG_INFO, " 3dnow"); |
3070 if (ff_mm_flags & MM_SSE) | 3070 if (mm_flags & MM_SSE) |
3071 av_log(avctx, AV_LOG_INFO, " sse"); | 3071 av_log(avctx, AV_LOG_INFO, " sse"); |
3072 if (ff_mm_flags & MM_SSE2) | 3072 if (mm_flags & MM_SSE2) |
3073 av_log(avctx, AV_LOG_INFO, " sse2"); | 3073 av_log(avctx, AV_LOG_INFO, " sse2"); |
3074 av_log(avctx, AV_LOG_INFO, "\n"); | 3074 av_log(avctx, AV_LOG_INFO, "\n"); |
3075 #endif | 3075 #endif |
3076 | 3076 |
3077 if (ff_mm_flags & MM_MMX) { | 3077 if (mm_flags & MM_MMX) { |
3078 const int idct_algo= avctx->idct_algo; | 3078 const int idct_algo= avctx->idct_algo; |
3079 | 3079 |
3080 #ifdef CONFIG_ENCODERS | 3080 #ifdef CONFIG_ENCODERS |
3081 const int dct_algo = avctx->dct_algo; | 3081 const int dct_algo = avctx->dct_algo; |
3082 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ | 3082 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ |
3083 if(ff_mm_flags & MM_SSE2){ | 3083 if(mm_flags & MM_SSE2){ |
3084 c->fdct = ff_fdct_sse2; | 3084 c->fdct = ff_fdct_sse2; |
3085 }else if(ff_mm_flags & MM_MMXEXT){ | 3085 }else if(mm_flags & MM_MMXEXT){ |
3086 c->fdct = ff_fdct_mmx2; | 3086 c->fdct = ff_fdct_mmx2; |
3087 }else{ | 3087 }else{ |
3088 c->fdct = ff_fdct_mmx; | 3088 c->fdct = ff_fdct_mmx; |
3089 } | 3089 } |
3090 } | 3090 } |
3095 c->idct_add= ff_simple_idct_add_mmx; | 3095 c->idct_add= ff_simple_idct_add_mmx; |
3096 c->idct = ff_simple_idct_mmx; | 3096 c->idct = ff_simple_idct_mmx; |
3097 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; | 3097 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; |
3098 #ifdef CONFIG_GPL | 3098 #ifdef CONFIG_GPL |
3099 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ | 3099 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ |
3100 if(ff_mm_flags & MM_MMXEXT){ | 3100 if(mm_flags & MM_MMXEXT){ |
3101 c->idct_put= ff_libmpeg2mmx2_idct_put; | 3101 c->idct_put= ff_libmpeg2mmx2_idct_put; |
3102 c->idct_add= ff_libmpeg2mmx2_idct_add; | 3102 c->idct_add= ff_libmpeg2mmx2_idct_add; |
3103 c->idct = ff_mmxext_idct; | 3103 c->idct = ff_mmxext_idct; |
3104 }else{ | 3104 }else{ |
3105 c->idct_put= ff_libmpeg2mmx_idct_put; | 3105 c->idct_put= ff_libmpeg2mmx_idct_put; |
3109 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | 3109 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
3110 #endif | 3110 #endif |
3111 }else if(idct_algo==FF_IDCT_VP3 && | 3111 }else if(idct_algo==FF_IDCT_VP3 && |
3112 avctx->codec->id!=CODEC_ID_THEORA && | 3112 avctx->codec->id!=CODEC_ID_THEORA && |
3113 !(avctx->flags & CODEC_FLAG_BITEXACT)){ | 3113 !(avctx->flags & CODEC_FLAG_BITEXACT)){ |
3114 if(ff_mm_flags & MM_SSE2){ | 3114 if(mm_flags & MM_SSE2){ |
3115 c->idct_put= ff_vp3_idct_put_sse2; | 3115 c->idct_put= ff_vp3_idct_put_sse2; |
3116 c->idct_add= ff_vp3_idct_add_sse2; | 3116 c->idct_add= ff_vp3_idct_add_sse2; |
3117 c->idct = ff_vp3_idct_sse2; | 3117 c->idct = ff_vp3_idct_sse2; |
3118 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; | 3118 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; |
3119 }else{ | 3119 }else{ |
3124 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; | 3124 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; |
3125 } | 3125 } |
3126 }else if(idct_algo==FF_IDCT_CAVS){ | 3126 }else if(idct_algo==FF_IDCT_CAVS){ |
3127 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; | 3127 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; |
3128 }else if(idct_algo==FF_IDCT_XVIDMMX){ | 3128 }else if(idct_algo==FF_IDCT_XVIDMMX){ |
3129 if(ff_mm_flags & MM_MMXEXT){ | 3129 if(mm_flags & MM_MMXEXT){ |
3130 c->idct_put= ff_idct_xvid_mmx2_put; | 3130 c->idct_put= ff_idct_xvid_mmx2_put; |
3131 c->idct_add= ff_idct_xvid_mmx2_add; | 3131 c->idct_add= ff_idct_xvid_mmx2_add; |
3132 c->idct = ff_idct_xvid_mmx2; | 3132 c->idct = ff_idct_xvid_mmx2; |
3133 }else{ | 3133 }else{ |
3134 c->idct_put= ff_idct_xvid_mmx_put; | 3134 c->idct_put= ff_idct_xvid_mmx_put; |
3198 | 3198 |
3199 c->hadamard8_diff[0]= hadamard8_diff16_mmx; | 3199 c->hadamard8_diff[0]= hadamard8_diff16_mmx; |
3200 c->hadamard8_diff[1]= hadamard8_diff_mmx; | 3200 c->hadamard8_diff[1]= hadamard8_diff_mmx; |
3201 | 3201 |
3202 c->pix_norm1 = pix_norm1_mmx; | 3202 c->pix_norm1 = pix_norm1_mmx; |
3203 c->sse[0] = (ff_mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx; | 3203 c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx; |
3204 c->sse[1] = sse8_mmx; | 3204 c->sse[1] = sse8_mmx; |
3205 c->vsad[4]= vsad_intra16_mmx; | 3205 c->vsad[4]= vsad_intra16_mmx; |
3206 | 3206 |
3207 c->nsse[0] = nsse16_mmx; | 3207 c->nsse[0] = nsse16_mmx; |
3208 c->nsse[1] = nsse8_mmx; | 3208 c->nsse[1] = nsse8_mmx; |
3225 c->h264_idct_dc_add= | 3225 c->h264_idct_dc_add= |
3226 c->h264_idct_add= ff_h264_idct_add_mmx; | 3226 c->h264_idct_add= ff_h264_idct_add_mmx; |
3227 c->h264_idct8_dc_add= | 3227 c->h264_idct8_dc_add= |
3228 c->h264_idct8_add= ff_h264_idct8_add_mmx; | 3228 c->h264_idct8_add= ff_h264_idct8_add_mmx; |
3229 | 3229 |
3230 if (ff_mm_flags & MM_MMXEXT) { | 3230 if (mm_flags & MM_MMXEXT) { |
3231 c->prefetch = prefetch_mmx2; | 3231 c->prefetch = prefetch_mmx2; |
3232 | 3232 |
3233 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 3233 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
3234 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | 3234 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
3235 | 3235 |
3367 #endif | 3367 #endif |
3368 | 3368 |
3369 #ifdef CONFIG_ENCODERS | 3369 #ifdef CONFIG_ENCODERS |
3370 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; | 3370 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; |
3371 #endif //CONFIG_ENCODERS | 3371 #endif //CONFIG_ENCODERS |
3372 } else if (ff_mm_flags & MM_3DNOW) { | 3372 } else if (mm_flags & MM_3DNOW) { |
3373 c->prefetch = prefetch_3dnow; | 3373 c->prefetch = prefetch_3dnow; |
3374 | 3374 |
3375 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | 3375 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
3376 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | 3376 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
3377 | 3377 |
3461 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; | 3461 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; |
3462 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; | 3462 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; |
3463 } | 3463 } |
3464 | 3464 |
3465 #ifdef CONFIG_SNOW_ENCODER | 3465 #ifdef CONFIG_SNOW_ENCODER |
3466 if(ff_mm_flags & MM_SSE2){ | 3466 if(mm_flags & MM_SSE2){ |
3467 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; | 3467 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; |
3468 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; | 3468 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; |
3469 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; | 3469 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; |
3470 } | 3470 } |
3471 else{ | 3471 else{ |
3473 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | 3473 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; |
3474 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; | 3474 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; |
3475 } | 3475 } |
3476 #endif | 3476 #endif |
3477 | 3477 |
3478 if(ff_mm_flags & MM_3DNOW){ | 3478 if(mm_flags & MM_3DNOW){ |
3479 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; | 3479 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; |
3480 c->vector_fmul = vector_fmul_3dnow; | 3480 c->vector_fmul = vector_fmul_3dnow; |
3481 if(!(avctx->flags & CODEC_FLAG_BITEXACT)) | 3481 if(!(avctx->flags & CODEC_FLAG_BITEXACT)) |
3482 c->float_to_int16 = float_to_int16_3dnow; | 3482 c->float_to_int16 = float_to_int16_3dnow; |
3483 } | 3483 } |
3484 if(ff_mm_flags & MM_3DNOWEXT) | 3484 if(mm_flags & MM_3DNOWEXT) |
3485 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; | 3485 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; |
3486 if(ff_mm_flags & MM_SSE){ | 3486 if(mm_flags & MM_SSE){ |
3487 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; | 3487 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
3488 c->vector_fmul = vector_fmul_sse; | 3488 c->vector_fmul = vector_fmul_sse; |
3489 c->float_to_int16 = float_to_int16_sse; | 3489 c->float_to_int16 = float_to_int16_sse; |
3490 c->vector_fmul_reverse = vector_fmul_reverse_sse; | 3490 c->vector_fmul_reverse = vector_fmul_reverse_sse; |
3491 c->vector_fmul_add_add = vector_fmul_add_add_sse; | 3491 c->vector_fmul_add_add = vector_fmul_add_add_sse; |
3492 } | 3492 } |
3493 if(ff_mm_flags & MM_3DNOW) | 3493 if(mm_flags & MM_3DNOW) |
3494 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse | 3494 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse |
3495 } | 3495 } |
3496 | 3496 |
3497 #ifdef CONFIG_ENCODERS | 3497 #ifdef CONFIG_ENCODERS |
3498 dsputil_init_pix_mmx(c, avctx); | 3498 dsputil_init_pix_mmx(c, avctx); |