comparison i386/dsputil_mmx.c @ 4197:bbe0bc387a19 libavcodec

revert bad checkin
author mru
date Tue, 14 Nov 2006 03:18:09 +0000
parents fbac0859753d
children d3e389536b0a
comparison
equal deleted inserted replaced
4196:fbac0859753d 4197:bbe0bc387a19
32 //#include <assert.h> 32 //#include <assert.h>
33 33
34 extern void ff_idct_xvid_mmx(short *block); 34 extern void ff_idct_xvid_mmx(short *block);
35 extern void ff_idct_xvid_mmx2(short *block); 35 extern void ff_idct_xvid_mmx2(short *block);
36 36
37 int ff_mm_flags; /* multimedia extension flags */ 37 int mm_flags; /* multimedia extension flags */
38 38
39 /* pixel operations */ 39 /* pixel operations */
40 static const uint64_t mm_bone attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL; 40 static const uint64_t mm_bone attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
41 static const uint64_t mm_wone attribute_used __attribute__ ((aligned(8))) = 0x0001000100010001ULL; 41 static const uint64_t mm_wone attribute_used __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
42 static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x0002000200020002ULL; 42 static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
3048 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); 3048 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
3049 #endif 3049 #endif
3050 3050
3051 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) 3051 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
3052 { 3052 {
3053 ff_mm_flags = mm_support(); 3053 mm_flags = mm_support();
3054 3054
3055 if (avctx->dsp_mask) { 3055 if (avctx->dsp_mask) {
3056 if (avctx->dsp_mask & FF_MM_FORCE) 3056 if (avctx->dsp_mask & FF_MM_FORCE)
3057 ff_mm_flags |= (avctx->dsp_mask & 0xffff); 3057 mm_flags |= (avctx->dsp_mask & 0xffff);
3058 else 3058 else
3059 ff_mm_flags &= ~(avctx->dsp_mask & 0xffff); 3059 mm_flags &= ~(avctx->dsp_mask & 0xffff);
3060 } 3060 }
3061 3061
3062 #if 0 3062 #if 0
3063 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); 3063 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
3064 if (ff_mm_flags & MM_MMX) 3064 if (mm_flags & MM_MMX)
3065 av_log(avctx, AV_LOG_INFO, " mmx"); 3065 av_log(avctx, AV_LOG_INFO, " mmx");
3066 if (ff_mm_flags & MM_MMXEXT) 3066 if (mm_flags & MM_MMXEXT)
3067 av_log(avctx, AV_LOG_INFO, " mmxext"); 3067 av_log(avctx, AV_LOG_INFO, " mmxext");
3068 if (ff_mm_flags & MM_3DNOW) 3068 if (mm_flags & MM_3DNOW)
3069 av_log(avctx, AV_LOG_INFO, " 3dnow"); 3069 av_log(avctx, AV_LOG_INFO, " 3dnow");
3070 if (ff_mm_flags & MM_SSE) 3070 if (mm_flags & MM_SSE)
3071 av_log(avctx, AV_LOG_INFO, " sse"); 3071 av_log(avctx, AV_LOG_INFO, " sse");
3072 if (ff_mm_flags & MM_SSE2) 3072 if (mm_flags & MM_SSE2)
3073 av_log(avctx, AV_LOG_INFO, " sse2"); 3073 av_log(avctx, AV_LOG_INFO, " sse2");
3074 av_log(avctx, AV_LOG_INFO, "\n"); 3074 av_log(avctx, AV_LOG_INFO, "\n");
3075 #endif 3075 #endif
3076 3076
3077 if (ff_mm_flags & MM_MMX) { 3077 if (mm_flags & MM_MMX) {
3078 const int idct_algo= avctx->idct_algo; 3078 const int idct_algo= avctx->idct_algo;
3079 3079
3080 #ifdef CONFIG_ENCODERS 3080 #ifdef CONFIG_ENCODERS
3081 const int dct_algo = avctx->dct_algo; 3081 const int dct_algo = avctx->dct_algo;
3082 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ 3082 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
3083 if(ff_mm_flags & MM_SSE2){ 3083 if(mm_flags & MM_SSE2){
3084 c->fdct = ff_fdct_sse2; 3084 c->fdct = ff_fdct_sse2;
3085 }else if(ff_mm_flags & MM_MMXEXT){ 3085 }else if(mm_flags & MM_MMXEXT){
3086 c->fdct = ff_fdct_mmx2; 3086 c->fdct = ff_fdct_mmx2;
3087 }else{ 3087 }else{
3088 c->fdct = ff_fdct_mmx; 3088 c->fdct = ff_fdct_mmx;
3089 } 3089 }
3090 } 3090 }
3095 c->idct_add= ff_simple_idct_add_mmx; 3095 c->idct_add= ff_simple_idct_add_mmx;
3096 c->idct = ff_simple_idct_mmx; 3096 c->idct = ff_simple_idct_mmx;
3097 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; 3097 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
3098 #ifdef CONFIG_GPL 3098 #ifdef CONFIG_GPL
3099 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ 3099 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
3100 if(ff_mm_flags & MM_MMXEXT){ 3100 if(mm_flags & MM_MMXEXT){
3101 c->idct_put= ff_libmpeg2mmx2_idct_put; 3101 c->idct_put= ff_libmpeg2mmx2_idct_put;
3102 c->idct_add= ff_libmpeg2mmx2_idct_add; 3102 c->idct_add= ff_libmpeg2mmx2_idct_add;
3103 c->idct = ff_mmxext_idct; 3103 c->idct = ff_mmxext_idct;
3104 }else{ 3104 }else{
3105 c->idct_put= ff_libmpeg2mmx_idct_put; 3105 c->idct_put= ff_libmpeg2mmx_idct_put;
3109 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 3109 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
3110 #endif 3110 #endif
3111 }else if(idct_algo==FF_IDCT_VP3 && 3111 }else if(idct_algo==FF_IDCT_VP3 &&
3112 avctx->codec->id!=CODEC_ID_THEORA && 3112 avctx->codec->id!=CODEC_ID_THEORA &&
3113 !(avctx->flags & CODEC_FLAG_BITEXACT)){ 3113 !(avctx->flags & CODEC_FLAG_BITEXACT)){
3114 if(ff_mm_flags & MM_SSE2){ 3114 if(mm_flags & MM_SSE2){
3115 c->idct_put= ff_vp3_idct_put_sse2; 3115 c->idct_put= ff_vp3_idct_put_sse2;
3116 c->idct_add= ff_vp3_idct_add_sse2; 3116 c->idct_add= ff_vp3_idct_add_sse2;
3117 c->idct = ff_vp3_idct_sse2; 3117 c->idct = ff_vp3_idct_sse2;
3118 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; 3118 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
3119 }else{ 3119 }else{
3124 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; 3124 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
3125 } 3125 }
3126 }else if(idct_algo==FF_IDCT_CAVS){ 3126 }else if(idct_algo==FF_IDCT_CAVS){
3127 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; 3127 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
3128 }else if(idct_algo==FF_IDCT_XVIDMMX){ 3128 }else if(idct_algo==FF_IDCT_XVIDMMX){
3129 if(ff_mm_flags & MM_MMXEXT){ 3129 if(mm_flags & MM_MMXEXT){
3130 c->idct_put= ff_idct_xvid_mmx2_put; 3130 c->idct_put= ff_idct_xvid_mmx2_put;
3131 c->idct_add= ff_idct_xvid_mmx2_add; 3131 c->idct_add= ff_idct_xvid_mmx2_add;
3132 c->idct = ff_idct_xvid_mmx2; 3132 c->idct = ff_idct_xvid_mmx2;
3133 }else{ 3133 }else{
3134 c->idct_put= ff_idct_xvid_mmx_put; 3134 c->idct_put= ff_idct_xvid_mmx_put;
3198 3198
3199 c->hadamard8_diff[0]= hadamard8_diff16_mmx; 3199 c->hadamard8_diff[0]= hadamard8_diff16_mmx;
3200 c->hadamard8_diff[1]= hadamard8_diff_mmx; 3200 c->hadamard8_diff[1]= hadamard8_diff_mmx;
3201 3201
3202 c->pix_norm1 = pix_norm1_mmx; 3202 c->pix_norm1 = pix_norm1_mmx;
3203 c->sse[0] = (ff_mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx; 3203 c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx;
3204 c->sse[1] = sse8_mmx; 3204 c->sse[1] = sse8_mmx;
3205 c->vsad[4]= vsad_intra16_mmx; 3205 c->vsad[4]= vsad_intra16_mmx;
3206 3206
3207 c->nsse[0] = nsse16_mmx; 3207 c->nsse[0] = nsse16_mmx;
3208 c->nsse[1] = nsse8_mmx; 3208 c->nsse[1] = nsse8_mmx;
3225 c->h264_idct_dc_add= 3225 c->h264_idct_dc_add=
3226 c->h264_idct_add= ff_h264_idct_add_mmx; 3226 c->h264_idct_add= ff_h264_idct_add_mmx;
3227 c->h264_idct8_dc_add= 3227 c->h264_idct8_dc_add=
3228 c->h264_idct8_add= ff_h264_idct8_add_mmx; 3228 c->h264_idct8_add= ff_h264_idct8_add_mmx;
3229 3229
3230 if (ff_mm_flags & MM_MMXEXT) { 3230 if (mm_flags & MM_MMXEXT) {
3231 c->prefetch = prefetch_mmx2; 3231 c->prefetch = prefetch_mmx2;
3232 3232
3233 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 3233 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
3234 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; 3234 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
3235 3235
3367 #endif 3367 #endif
3368 3368
3369 #ifdef CONFIG_ENCODERS 3369 #ifdef CONFIG_ENCODERS
3370 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; 3370 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
3371 #endif //CONFIG_ENCODERS 3371 #endif //CONFIG_ENCODERS
3372 } else if (ff_mm_flags & MM_3DNOW) { 3372 } else if (mm_flags & MM_3DNOW) {
3373 c->prefetch = prefetch_3dnow; 3373 c->prefetch = prefetch_3dnow;
3374 3374
3375 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; 3375 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
3376 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; 3376 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
3377 3377
3461 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; 3461 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
3462 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; 3462 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
3463 } 3463 }
3464 3464
3465 #ifdef CONFIG_SNOW_ENCODER 3465 #ifdef CONFIG_SNOW_ENCODER
3466 if(ff_mm_flags & MM_SSE2){ 3466 if(mm_flags & MM_SSE2){
3467 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; 3467 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
3468 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; 3468 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
3469 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; 3469 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
3470 } 3470 }
3471 else{ 3471 else{
3473 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; 3473 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
3474 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; 3474 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
3475 } 3475 }
3476 #endif 3476 #endif
3477 3477
3478 if(ff_mm_flags & MM_3DNOW){ 3478 if(mm_flags & MM_3DNOW){
3479 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; 3479 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
3480 c->vector_fmul = vector_fmul_3dnow; 3480 c->vector_fmul = vector_fmul_3dnow;
3481 if(!(avctx->flags & CODEC_FLAG_BITEXACT)) 3481 if(!(avctx->flags & CODEC_FLAG_BITEXACT))
3482 c->float_to_int16 = float_to_int16_3dnow; 3482 c->float_to_int16 = float_to_int16_3dnow;
3483 } 3483 }
3484 if(ff_mm_flags & MM_3DNOWEXT) 3484 if(mm_flags & MM_3DNOWEXT)
3485 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; 3485 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
3486 if(ff_mm_flags & MM_SSE){ 3486 if(mm_flags & MM_SSE){
3487 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; 3487 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
3488 c->vector_fmul = vector_fmul_sse; 3488 c->vector_fmul = vector_fmul_sse;
3489 c->float_to_int16 = float_to_int16_sse; 3489 c->float_to_int16 = float_to_int16_sse;
3490 c->vector_fmul_reverse = vector_fmul_reverse_sse; 3490 c->vector_fmul_reverse = vector_fmul_reverse_sse;
3491 c->vector_fmul_add_add = vector_fmul_add_add_sse; 3491 c->vector_fmul_add_add = vector_fmul_add_add_sse;
3492 } 3492 }
3493 if(ff_mm_flags & MM_3DNOW) 3493 if(mm_flags & MM_3DNOW)
3494 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse 3494 c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
3495 } 3495 }
3496 3496
3497 #ifdef CONFIG_ENCODERS 3497 #ifdef CONFIG_ENCODERS
3498 dsputil_init_pix_mmx(c, avctx); 3498 dsputil_init_pix_mmx(c, avctx);