# HG changeset patch # User mru # Date 1170710191 0 # Node ID ee7422a921cb1902efd306997ca77b634690d723 # Parent f3987e08b9da7c4ea36fa5d74e1bedd5eb70a21f fix multichannel decoding diff -r f3987e08b9da -r ee7422a921cb armv4l/simple_idct_armv6.S --- a/armv4l/simple_idct_armv6.S Mon Feb 05 20:17:02 2007 +0000 +++ b/armv4l/simple_idct_armv6.S Mon Feb 05 21:16:31 2007 +0000 @@ -47,6 +47,19 @@ w46: .long W46 w57: .long W57 + .macro idct_row_start shift + ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ + ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */ + ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ + mov a2, #(1<<(\shift-1)) + smlad v1, a3, ip, a2 + smlsd v4, a3, ip, a2 + ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ + smlad v2, a3, lr, a2 + smlsd v3, a3, lr, a2 + smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */ + smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */ + .endm /* Compute partial IDCT of single row. shift = left-shift amount @@ -58,17 +71,6 @@ Output in registers v1--v8 */ .macro idct_row shift - ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */ - mov a2, #(1<<(\shift-1)) - smlad v1, a3, ip, a2 - smlsd v4, a3, ip, a2 - ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ - ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ - smlad v2, a3, lr, a2 - smlsd v3, a3, lr, a2 - - smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */ - smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */ ldr lr, [a1, #12] /* lr = row[7,5] */ pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */ pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */ @@ -99,16 +101,6 @@ Output in registers v1--v8 */ .macro idct_row4 shift - ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */ - ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ - mov a2, #(1<<(\shift-1)) - smlad v1, a3, ip, a2 - smlsd v4, a3, ip, a2 - ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */ - smlad v2, a3, lr, a2 - smlsd v3, a3, lr, a2 - smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */ - smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */ pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */ pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */ smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */ @@ -193,28 +185,100 @@ .align .func idct_row_armv6 idct_row_armv6: - str lr, [sp, #-4]! - - ldr lr, [a1, #12] /* lr = row[7,5] */ - ldr ip, [a1, #4] /* ip = row[6,4] */ + ldr fp, [a1, #12] /* fp = row[7,5] */ + ldr v7, [a1, #4] /* v7 = row[6,4] */ ldr a4, [a1, #8] /* a4 = row[3,1] */ ldr a3, [a1] /* a3 = row[2,0] */ - orrs lr, lr, ip - cmpeq lr, a4 - cmpeq lr, a3, lsr #16 + mov ip, #(1<<(ROW_SHIFT-1)) + orrs v5, fp, v7 + cmpeq v5, a4 + cmpeq v5, a3, lsr #16 beq 1f - str a2, [sp, #-4]! - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ - cmp lr, #0 - beq 2f + cmp v5, #0 + stmfd sp!, {a2, lr} + ldr v5, [pc, #(w42-.-8)] /* v5 = W4 | (W2 << 16) */ + ldr v6, [pc, #(w46-.-8)] /* v6 = W4 | (W6 << 16) */ + ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */ + + smlad v1, a3, v5, ip + smlsd v4, a3, v5, ip + ldr a2, [pc, #(w13-.-8)] /* a2 = W1 | (W3 << 16) */ + smlad v2, a3, v6, ip + smlsd v3, a3, v6, ip + smusdx lr, a4, v7 /* lr = B3 = W7*row[1] - W5*row[3] */ + smuad v5, a4, a2 /* v5 = B0 = W1*row[1] + W3*row[3] */ + + pkhtb a3, a2, v7, asr #16 /* a3 = W7 | (W3 << 16) */ + pkhbt ip, a2, v7, lsl #16 /* ip = W1 | (W5 << 16) */ + smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */ + smusdx a4, a4, ip /* v7 = B2 = W5*row[1] - W1*row[3] */ + beq 3f + + smlad v5, fp, v7, v5 /* B0 += W5*row[5] + W7*row[7] */ + smlad v7, fp, a3, a4 /* B2 += W7*row[5] + W3*row[7] */ + ldr a4, [pc, #(w42n-.-8)] /* a4 = -W4 | (-W2 << 16) */ + ldr a3, [a1, #4] /* a3 = row[6,4] */ + smlsdx lr, fp, a2, lr /* B3 += W3*row[5] - W1*row[7] */ + ldr a2, [pc, #(w46-.-8)] /* a2 = W4 | (W6 << 16) */ + smlad v6, fp, ip, v6 /* B1 -= W1*row[5] + W5*row[7] */ - idct_row ROW_SHIFT - b 3f + smlad v2, a3, a4, v2 /* A1 += -W4*row[4] - W2*row[6] */ + smlsd v3, a3, a4, v3 /* A2 += -W4*row[4] + W2*row[6] */ + smlad v1, a3, a2, v1 /* A0 += W4*row[4] + W6*row[6] */ + smlsd v4, a3, a2, v4 /* A3 += W4*row[4] - W6*row[6] */ + + ldr a2, [sp], #4 + add a4, v1, v5 /* a4 = A0 + B0 */ + sub a3, v1, v5 /* a3 = A0 - B0 */ + mov v1, a4, asr #ROW_SHIFT + mov v5, a3, asr #ROW_SHIFT + + sub a4, v2, v6 /* a4 = A1 + B1 */ + add a3, v2, v6 /* a3 = A1 - B1 */ + mov v2, a4, asr #ROW_SHIFT + mov v6, a3, asr #ROW_SHIFT -2: idct_row4 ROW_SHIFT + add a4, v3, v7 /* a4 = A2 + B2 */ + sub a3, v3, v7 /* a3 = A2 - B2 */ + mov v3, a4, asr #ROW_SHIFT + mov v7, a3, asr #ROW_SHIFT + + add a4, v4, lr /* a4 = A3 + B3 */ + sub a3, v4, lr /* a3 = A3 - B3 */ + mov v4, a4, asr #ROW_SHIFT + mov fp, a3, asr #ROW_SHIFT + + strh v1, [a2] + strh v2, [a2, #(16*2)] + strh v3, [a2, #(16*4)] + strh v4, [a2, #(16*6)] + strh fp, [a2, #(16*1)] + strh v7, [a2, #(16*3)] + strh v6, [a2, #(16*5)] + strh v5, [a2, #(16*7)] + + ldr pc, [sp], #4 3: ldr a2, [sp], #4 - idct_finish_shift ROW_SHIFT + add v7, v1, v5 /* v7 = A0 + B0 */ + sub a3, v1, v5 /* a3 = A0 - B0 */ + mov v1, v7, asr #ROW_SHIFT + mov v5, a3, asr #ROW_SHIFT + + sub v7, v2, v6 /* v7 = A1 + B1 */ + add a3, v2, v6 /* a3 = A1 - B1 */ + mov v2, v7, asr #ROW_SHIFT + mov v6, a3, asr #ROW_SHIFT + + add v7, v3, a4 /* v7 = A2 + B2 */ + sub a3, v3, a4 /* a3 = A2 - B2 */ + mov v3, v7, asr #ROW_SHIFT + mov v7, a3, asr #ROW_SHIFT + + add a4, v4, lr /* xx = A3 + B3 */ + sub a3, v4, lr /* a3 = A3 - B3 */ + mov v4, a4, asr #ROW_SHIFT + mov fp, a3, asr #ROW_SHIFT strh v1, [a2] strh v2, [a2, #(16*2)] @@ -236,7 +300,7 @@ strh a3, [a2, #(16*3)] strh a3, [a2, #(16*5)] strh a3, [a2, #(16*7)] - ldr pc, [sp], #4 + mov pc, lr .endfunc /* @@ -250,8 +314,8 @@ stmfd sp!, {a2, lr} ldr a3, [a1] /* a3 = row[2,0] */ - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ ldr a4, [a1, #8] /* a4 = row[3,1] */ + idct_row_start COL_SHIFT idct_row COL_SHIFT ldr a2, [sp], #4 idct_finish_shift COL_SHIFT @@ -280,8 +344,8 @@ stmfd sp!, {a2, a3, lr} ldr a3, [a1] /* a3 = row[2,0] */ - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ ldr a4, [a1, #8] /* a4 = row[3,1] */ + idct_row_start COL_SHIFT idct_row COL_SHIFT ldmfd sp!, {a2, a3} idct_finish_shift_sat COL_SHIFT @@ -312,8 +376,8 @@ stmfd sp!, {a2, a3, lr} ldr a3, [a1] /* a3 = row[2,0] */ - ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */ ldr a4, [a1, #8] /* a4 = row[3,1] */ + idct_row_start COL_SHIFT idct_row COL_SHIFT ldmfd sp!, {a2, a3} idct_finish diff -r f3987e08b9da -r ee7422a921cb dtsdec.c --- a/dtsdec.c Mon Feb 05 20:17:02 2007 +0000 +++ b/dtsdec.c Mon Feb 05 21:16:31 2007 +0000 @@ -28,13 +28,8 @@ #define BUFFER_SIZE 18726 #define HEADER_SIZE 14 -#ifdef LIBDTS_FIXED -#define CONVERT_LEVEL (1 << 26) +#define CONVERT_LEVEL 1 #define CONVERT_BIAS 0 -#else -#define CONVERT_LEVEL 1 -#define CONVERT_BIAS 384 -#endif typedef struct DTSContext { dts_state_t *state; @@ -44,151 +39,120 @@ } DTSContext; static inline int16_t -convert(int32_t i) -{ -#ifdef LIBDTS_FIXED - i >>= 15; -#else - i -= 0x43c00000; -#endif - return (i > 32767) ? 32767 : ((i < -32768) ? -32768 : i); -} - -static void -convert2s16_2(sample_t * _f, int16_t * s16) -{ - int i; - int32_t *f = (int32_t *) _f; - - for(i = 0; i < 256; i++) { - s16[2 * i] = convert(f[i]); - s16[2 * i + 1] = convert(f[i + 256]); - } -} - -static void -convert2s16_4(sample_t * _f, int16_t * s16) +convert(sample_t s) { - int i; - int32_t *f = (int32_t *) _f; - - for(i = 0; i < 256; i++) { - s16[4 * i] = convert(f[i]); - s16[4 * i + 1] = convert(f[i + 256]); - s16[4 * i + 2] = convert(f[i + 512]); - s16[4 * i + 3] = convert(f[i + 768]); - } -} - -static void -convert2s16_5(sample_t * _f, int16_t * s16) -{ - int i; - int32_t *f = (int32_t *) _f; - - for(i = 0; i < 256; i++) { - s16[5 * i] = convert(f[i]); - s16[5 * i + 1] = convert(f[i + 256]); - s16[5 * i + 2] = convert(f[i + 512]); - s16[5 * i + 3] = convert(f[i + 768]); - s16[5 * i + 4] = convert(f[i + 1024]); - } + return s * 0x7fff; } static void -convert2s16_multi(sample_t * _f, int16_t * s16, int flags) +convert2s16_multi(sample_t *f, int16_t *s16, int flags) { int i; - int32_t *f = (int32_t *) _f; - switch (flags) { + switch(flags & (DTS_CHANNEL_MASK | DTS_LFE)){ case DTS_MONO: - for(i = 0; i < 256; i++) { - s16[5 * i] = s16[5 * i + 1] = s16[5 * i + 2] = s16[5 * i + 3] = - 0; - s16[5 * i + 4] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert(f[i]); } - break; case DTS_CHANNEL: case DTS_STEREO: case DTS_DOLBY: - convert2s16_2(_f, s16); - break; + for(i = 0; i < 256; i++){ + s16[2*i] = convert(f[i]); + s16[2*i+1] = convert(f[i+256]); + } case DTS_3F: - for(i = 0; i < 256; i++) { - s16[5 * i] = convert(f[i]); - s16[5 * i + 1] = convert(f[i + 512]); - s16[5 * i + 2] = s16[5 * i + 3] = 0; - s16[5 * i + 4] = convert(f[i + 256]); + for(i = 0; i < 256; i++){ + s16[5*i] = convert(f[i+256]); + s16[5*i+1] = convert(f[i+512]); + s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert(f[i]); } - break; case DTS_2F2R: - convert2s16_4(_f, s16); - break; + for(i = 0; i < 256; i++){ + s16[4*i] = convert(f[i]); + s16[4*i+1] = convert(f[i+256]); + s16[4*i+2] = convert(f[i+512]); + s16[4*i+3] = convert(f[i+768]); + } case DTS_3F2R: - convert2s16_5(_f, s16); - break; + for(i = 0; i < 256; i++){ + s16[5*i] = convert(f[i+256]); + s16[5*i+1] = convert(f[i+512]); + s16[5*i+2] = convert(f[i+768]); + s16[5*i+3] = convert(f[i+1024]); + s16[5*i+4] = convert(f[i]); + } case DTS_MONO | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = s16[6 * i + 1] = s16[6 * i + 2] = s16[6 * i + 3] = - 0; - s16[6 * i + 4] = convert(f[i + 256]); - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert(f[i]); + s16[6*i+5] = convert(f[i+256]); } - break; case DTS_CHANNEL | DTS_LFE: case DTS_STEREO | DTS_LFE: case DTS_DOLBY | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 512]); - s16[6 * i + 2] = s16[6 * i + 3] = s16[6 * i + 4] = 0; - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i]); + s16[6*i+1] = convert(f[i+256]); + s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; + s16[6*i+5] = convert(f[i+512]); } - break; case DTS_3F | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 768]); - s16[6 * i + 2] = s16[6 * i + 3] = 0; - s16[6 * i + 4] = convert(f[i + 512]); - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i+256]); + s16[6*i+1] = convert(f[i+512]); + s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert(f[i]); + s16[6*i+5] = convert(f[i+768]); } - break; case DTS_2F2R | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 512]); - s16[6 * i + 2] = convert(f[i + 768]); - s16[6 * i + 3] = convert(f[i + 1024]); - s16[6 * i + 4] = 0; - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i]); + s16[6*i+1] = convert(f[i+256]); + s16[6*i+2] = convert(f[i+512]); + s16[6*i+3] = convert(f[i+768]); + s16[6*i+4] = 0; + s16[6*i+5] = convert(f[i+1024]); } - break; case DTS_3F2R | DTS_LFE: - for(i = 0; i < 256; i++) { - s16[6 * i] = convert(f[i + 256]); - s16[6 * i + 1] = convert(f[i + 768]); - s16[6 * i + 2] = convert(f[i + 1024]); - s16[6 * i + 3] = convert(f[i + 1280]); - s16[6 * i + 4] = convert(f[i + 512]); - s16[6 * i + 5] = convert(f[i]); + for(i = 0; i < 256; i++){ + s16[6*i] = convert(f[i+256]); + s16[6*i+1] = convert(f[i+512]); + s16[6*i+2] = convert(f[i+768]); + s16[6*i+3] = convert(f[i+1024]); + s16[6*i+4] = convert(f[i]); + s16[6*i+5] = convert(f[i+1280]); } - break; } } static int channels_multi(int flags) { - if(flags & DTS_LFE) - return 6; - else if(flags & 1) /* center channel */ + switch(flags & (DTS_CHANNEL_MASK | DTS_LFE)){ + case DTS_CHANNEL: + case DTS_STEREO: + case DTS_DOLBY: + return 2; + case DTS_2F2R: + return 4; + case DTS_MONO: + case DTS_3F: + case DTS_3F2R: return 5; - else if((flags & DTS_CHANNEL_MASK) == DTS_2F2R) - return 4; - else - return 2; + case DTS_MONO | DTS_LFE: + case DTS_CHANNEL | DTS_LFE: + case DTS_STEREO | DTS_LFE: + case DTS_DOLBY | DTS_LFE: + case DTS_3F | DTS_LFE: + case DTS_2F2R | DTS_LFE: + case DTS_3F2R | DTS_LFE: + return 6; + } + + return -1; } static int @@ -206,6 +170,7 @@ int len; level_t level; sample_t bias; + int nblocks; int i; *data_size = 0; @@ -237,7 +202,6 @@ s->bufpos = s->buf + length; } - flags = 2; /* ???????????? */ level = CONVERT_LEVEL; bias = CONVERT_BIAS; @@ -251,20 +215,18 @@ avctx->channels = channels_multi(flags); avctx->bit_rate = bit_rate; - for(i = 0; i < dts_blocks_num(s->state); i++) { - int chans; + nblocks = dts_blocks_num(s->state); + for(i = 0; i < nblocks; i++) { if(dts_block(s->state)) { av_log(avctx, AV_LOG_ERROR, "dts_block() failed\n"); goto end; } - chans = channels_multi(flags); - convert2s16_multi(dts_samples(s->state), out_samples, - flags & (DTS_CHANNEL_MASK | DTS_LFE)); + convert2s16_multi(dts_samples(s->state), out_samples, flags); - out_samples += 256 * chans; - *data_size += 256 * sizeof(int16_t) * chans; + out_samples += 256 * avctx->channels; + *data_size += 256 * sizeof(int16_t) * avctx->channels; } end: