comparison wmadec.c @ 4301:b43bd0c56eaa libavcodec

Bug fix for crashes when SSE is used on unaligned arrays. No measureable change in speed. This gave random crashes on Win32 and BeOS. The cause for this bug is that gcc doesn't align the stackframe. Linux and glibc always ensure this to be true thus this never affected Linux.
author banan
date Thu, 14 Dec 2006 17:50:23 +0000
parents c8c591fe26f8
children 0efc832d9102
comparison
equal deleted inserted replaced
4300:95044f594170 4301:b43bd0c56eaa
113 uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ 113 uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */
114 DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); 114 DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]);
115 float max_exponent[MAX_CHANNELS]; 115 float max_exponent[MAX_CHANNELS];
116 int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; 116 int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
117 DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); 117 DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
118 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
119 DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
118 MDCTContext mdct_ctx[BLOCK_NB_SIZES]; 120 MDCTContext mdct_ctx[BLOCK_NB_SIZES];
119 float *windows[BLOCK_NB_SIZES]; 121 float *windows[BLOCK_NB_SIZES];
120 DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ 122 DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */
121 /* output buffer for one frame and the last for IMDCT windowing */ 123 /* output buffer for one frame and the last for IMDCT windowing */
122 DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); 124 DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]);
715 unrecorrable error. */ 717 unrecorrable error. */
716 static int wma_decode_block(WMADecodeContext *s) 718 static int wma_decode_block(WMADecodeContext *s)
717 { 719 {
718 int n, v, a, ch, code, bsize; 720 int n, v, a, ch, code, bsize;
719 int coef_nb_bits, total_gain, parse_exponents; 721 int coef_nb_bits, total_gain, parse_exponents;
720 DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
721 int nb_coefs[MAX_CHANNELS]; 722 int nb_coefs[MAX_CHANNELS];
722 float mdct_norm; 723 float mdct_norm;
723 724
724 #ifdef TRACE 725 #ifdef TRACE
725 tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num); 726 tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num);
1070 block_len = s->block_len; 1071 block_len = s->block_len;
1071 prev_block_len = 1 << s->prev_block_len_bits; 1072 prev_block_len = 1 << s->prev_block_len_bits;
1072 next_block_len = 1 << s->next_block_len_bits; 1073 next_block_len = 1 << s->next_block_len_bits;
1073 1074
1074 /* right part */ 1075 /* right part */
1075 wptr = window + block_len; 1076 wptr = s->window + block_len;
1076 if (block_len <= next_block_len) { 1077 if (block_len <= next_block_len) {
1077 for(i=0;i<block_len;i++) 1078 for(i=0;i<block_len;i++)
1078 *wptr++ = s->windows[bsize][i]; 1079 *wptr++ = s->windows[bsize][i];
1079 } else { 1080 } else {
1080 /* overlap */ 1081 /* overlap */
1086 for(i=0;i<n;i++) 1087 for(i=0;i<n;i++)
1087 *wptr++ = 0.0; 1088 *wptr++ = 0.0;
1088 } 1089 }
1089 1090
1090 /* left part */ 1091 /* left part */
1091 wptr = window + block_len; 1092 wptr = s->window + block_len;
1092 if (block_len <= prev_block_len) { 1093 if (block_len <= prev_block_len) {
1093 for(i=0;i<block_len;i++) 1094 for(i=0;i<block_len;i++)
1094 *--wptr = s->windows[bsize][i]; 1095 *--wptr = s->windows[bsize][i];
1095 } else { 1096 } else {
1096 /* overlap */ 1097 /* overlap */
1105 } 1106 }
1106 1107
1107 1108
1108 for(ch = 0; ch < s->nb_channels; ch++) { 1109 for(ch = 0; ch < s->nb_channels; ch++) {
1109 if (s->channel_coded[ch]) { 1110 if (s->channel_coded[ch]) {
1110 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
1111 float *ptr; 1111 float *ptr;
1112 int n4, index, n; 1112 int n4, index, n;
1113 1113
1114 n = s->block_len; 1114 n = s->block_len;
1115 n4 = s->block_len / 2; 1115 n4 = s->block_len / 2;
1116 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], 1116 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
1117 output, s->coefs[ch], s->mdct_tmp); 1117 s->output, s->coefs[ch], s->mdct_tmp);
1118 1118
1119 /* XXX: optimize all that by build the window and 1119 /* XXX: optimize all that by build the window and
1120 multipying/adding at the same time */ 1120 multipying/adding at the same time */
1121 1121
1122 /* multiply by the window and add in the frame */ 1122 /* multiply by the window and add in the frame */
1123 index = (s->frame_len / 2) + s->block_pos - n4; 1123 index = (s->frame_len / 2) + s->block_pos - n4;
1124 ptr = &s->frame_out[ch][index]; 1124 ptr = &s->frame_out[ch][index];
1125 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); 1125 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
1126 1126
1127 /* specific fast case for ms-stereo : add to second 1127 /* specific fast case for ms-stereo : add to second
1128 channel if it is not coded */ 1128 channel if it is not coded */
1129 if (s->ms_stereo && !s->channel_coded[1]) { 1129 if (s->ms_stereo && !s->channel_coded[1]) {
1130 ptr = &s->frame_out[1][index]; 1130 ptr = &s->frame_out[1][index];
1131 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); 1131 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
1132 } 1132 }
1133 } 1133 }
1134 } 1134 }
1135 next: 1135 next:
1136 /* update block number */ 1136 /* update block number */