Mercurial > libavcodec.hg
comparison wmadec.c @ 4301:b43bd0c56eaa libavcodec
Bug fix for crashes when SSE is used on unaligned arrays.
No measureable change in speed. This gave random crashes on Win32
and BeOS. The cause for this bug is that gcc doesn't align the
stackframe. Linux and glibc always ensure this to be true thus
this never affected Linux.
author | banan |
---|---|
date | Thu, 14 Dec 2006 17:50:23 +0000 |
parents | c8c591fe26f8 |
children | 0efc832d9102 |
comparison
equal
deleted
inserted
replaced
4300:95044f594170 | 4301:b43bd0c56eaa |
---|---|
113 uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ | 113 uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ |
114 DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); | 114 DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
115 float max_exponent[MAX_CHANNELS]; | 115 float max_exponent[MAX_CHANNELS]; |
116 int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | 116 int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; |
117 DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); | 117 DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
118 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); | |
119 DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]); | |
118 MDCTContext mdct_ctx[BLOCK_NB_SIZES]; | 120 MDCTContext mdct_ctx[BLOCK_NB_SIZES]; |
119 float *windows[BLOCK_NB_SIZES]; | 121 float *windows[BLOCK_NB_SIZES]; |
120 DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ | 122 DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ |
121 /* output buffer for one frame and the last for IMDCT windowing */ | 123 /* output buffer for one frame and the last for IMDCT windowing */ |
122 DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); | 124 DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); |
715 unrecorrable error. */ | 717 unrecorrable error. */ |
716 static int wma_decode_block(WMADecodeContext *s) | 718 static int wma_decode_block(WMADecodeContext *s) |
717 { | 719 { |
718 int n, v, a, ch, code, bsize; | 720 int n, v, a, ch, code, bsize; |
719 int coef_nb_bits, total_gain, parse_exponents; | 721 int coef_nb_bits, total_gain, parse_exponents; |
720 DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]); | |
721 int nb_coefs[MAX_CHANNELS]; | 722 int nb_coefs[MAX_CHANNELS]; |
722 float mdct_norm; | 723 float mdct_norm; |
723 | 724 |
724 #ifdef TRACE | 725 #ifdef TRACE |
725 tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num); | 726 tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num); |
1070 block_len = s->block_len; | 1071 block_len = s->block_len; |
1071 prev_block_len = 1 << s->prev_block_len_bits; | 1072 prev_block_len = 1 << s->prev_block_len_bits; |
1072 next_block_len = 1 << s->next_block_len_bits; | 1073 next_block_len = 1 << s->next_block_len_bits; |
1073 | 1074 |
1074 /* right part */ | 1075 /* right part */ |
1075 wptr = window + block_len; | 1076 wptr = s->window + block_len; |
1076 if (block_len <= next_block_len) { | 1077 if (block_len <= next_block_len) { |
1077 for(i=0;i<block_len;i++) | 1078 for(i=0;i<block_len;i++) |
1078 *wptr++ = s->windows[bsize][i]; | 1079 *wptr++ = s->windows[bsize][i]; |
1079 } else { | 1080 } else { |
1080 /* overlap */ | 1081 /* overlap */ |
1086 for(i=0;i<n;i++) | 1087 for(i=0;i<n;i++) |
1087 *wptr++ = 0.0; | 1088 *wptr++ = 0.0; |
1088 } | 1089 } |
1089 | 1090 |
1090 /* left part */ | 1091 /* left part */ |
1091 wptr = window + block_len; | 1092 wptr = s->window + block_len; |
1092 if (block_len <= prev_block_len) { | 1093 if (block_len <= prev_block_len) { |
1093 for(i=0;i<block_len;i++) | 1094 for(i=0;i<block_len;i++) |
1094 *--wptr = s->windows[bsize][i]; | 1095 *--wptr = s->windows[bsize][i]; |
1095 } else { | 1096 } else { |
1096 /* overlap */ | 1097 /* overlap */ |
1105 } | 1106 } |
1106 | 1107 |
1107 | 1108 |
1108 for(ch = 0; ch < s->nb_channels; ch++) { | 1109 for(ch = 0; ch < s->nb_channels; ch++) { |
1109 if (s->channel_coded[ch]) { | 1110 if (s->channel_coded[ch]) { |
1110 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); | |
1111 float *ptr; | 1111 float *ptr; |
1112 int n4, index, n; | 1112 int n4, index, n; |
1113 | 1113 |
1114 n = s->block_len; | 1114 n = s->block_len; |
1115 n4 = s->block_len / 2; | 1115 n4 = s->block_len / 2; |
1116 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], | 1116 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], |
1117 output, s->coefs[ch], s->mdct_tmp); | 1117 s->output, s->coefs[ch], s->mdct_tmp); |
1118 | 1118 |
1119 /* XXX: optimize all that by build the window and | 1119 /* XXX: optimize all that by build the window and |
1120 multipying/adding at the same time */ | 1120 multipying/adding at the same time */ |
1121 | 1121 |
1122 /* multiply by the window and add in the frame */ | 1122 /* multiply by the window and add in the frame */ |
1123 index = (s->frame_len / 2) + s->block_pos - n4; | 1123 index = (s->frame_len / 2) + s->block_pos - n4; |
1124 ptr = &s->frame_out[ch][index]; | 1124 ptr = &s->frame_out[ch][index]; |
1125 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); | 1125 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); |
1126 | 1126 |
1127 /* specific fast case for ms-stereo : add to second | 1127 /* specific fast case for ms-stereo : add to second |
1128 channel if it is not coded */ | 1128 channel if it is not coded */ |
1129 if (s->ms_stereo && !s->channel_coded[1]) { | 1129 if (s->ms_stereo && !s->channel_coded[1]) { |
1130 ptr = &s->frame_out[1][index]; | 1130 ptr = &s->frame_out[1][index]; |
1131 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); | 1131 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); |
1132 } | 1132 } |
1133 } | 1133 } |
1134 } | 1134 } |
1135 next: | 1135 next: |
1136 /* update block number */ | 1136 /* update block number */ |