# HG changeset patch # User michael # Date 1067896267 0 # Node ID 0b2eed7b1210effda6d51aa7b67382aeb205d3e9 # Parent 85d558a18134cb69b6eb1f802ed6ca6aecd91d66 movenc patch by (Gildas Bazin ) diff -r 85d558a18134 -r 0b2eed7b1210 movenc.c --- a/movenc.c Sun Nov 02 21:55:38 2003 +0000 +++ b/movenc.c Mon Nov 03 21:51:07 2003 +0000 @@ -1,6 +1,7 @@ /* * MOV, 3GP, MP4 encoder. * Copyright (c) 2003 Thomas Raivio. + * Enhancements by Gildas Bazin * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -28,22 +29,21 @@ #define globalTimescale 1000 typedef struct MOVIentry { - unsigned int flags, pos, len; - unsigned int chunkSize; + unsigned int flags, pos, size; + unsigned int samplesInChunk; char key_frame; unsigned int entries; } MOVIentry; typedef struct MOVIndex { int entry; - int samples; int mdat_size; int ents_allocated; long timescale; long time; - long frameCount; long trackDuration; - long sampleDelta; + long sampleCount; + long sampleDuration; int hasKeyframes; int trackID; AVCodecContext *enc; @@ -63,6 +63,8 @@ MOVTrack tracks[MAX_STREAMS]; } MOVContext; +static int mov_write_esds_tag(ByteIOContext *pb, MOVTrack* track); + //FIXME supprt 64bit varaint with wide placeholders static int updateSize (ByteIOContext *pb, int pos) { @@ -74,6 +76,7 @@ return curpos - pos; } +/* Chunk offset atom */ static int mov_write_stco_tag(ByteIOContext *pb, MOVTrack* track) { int i; @@ -90,10 +93,11 @@ return updateSize (pb, pos); } +/* Sample size atom */ static int mov_write_stsz_tag(ByteIOContext *pb, MOVTrack* track) { int equalChunks = 1; - int i, tst = -1, oldtst = -1; + int i, j, entries = 0, tst = -1, oldtst = -1; int pos = url_ftell(pb); put_be32(pb, 0); /* size */ @@ -103,32 +107,34 @@ for (i=0; ientry; i++) { int cl = i / MOV_INDEX_CLUSTER_SIZE; int id = i % MOV_INDEX_CLUSTER_SIZE; - tst = track->cluster[cl][id].len; - if(oldtst != -1 && tst != oldtst) { - equalChunks = 0; - break; + tst = track->cluster[cl][id].size/track->cluster[cl][id].entries; + if(oldtst != -1 && tst != oldtst) { + equalChunks = 0; } oldtst = tst; + entries += track->cluster[cl][id].entries; } - if(equalChunks || - track->enc->codec_type == CODEC_TYPE_AUDIO) { - //int sSize = track->cluster[0][0].len/track->cluster[0][0].entries; - int sSize = track->cluster[0][0].len; + if (equalChunks) { + int sSize = track->cluster[0][0].size/track->cluster[0][0].entries; put_be32(pb, sSize); // sample size - put_be32(pb, track->samples/track->enc->channels); // sample count + put_be32(pb, entries); // sample count } else { put_be32(pb, 0); // sample size - put_be32(pb, track->entry); // sample count + put_be32(pb, entries); // sample count for (i=0; ientry; i++) { int cl = i / MOV_INDEX_CLUSTER_SIZE; int id = i % MOV_INDEX_CLUSTER_SIZE; - put_be32(pb, track->cluster[cl][id].len); + for ( j=0; jcluster[cl][id].entries; j++) { + put_be32(pb, track->cluster[cl][id].size / + track->cluster[cl][id].entries); + } } } return updateSize (pb, pos); } +/* Sample to chunk atom */ static int mov_write_stsc_tag(ByteIOContext *pb, MOVTrack* track) { int index = 0, oldval = -1, i, entryPos, curpos; @@ -142,12 +148,12 @@ for (i=0; ientry; i++) { int cl = i / MOV_INDEX_CLUSTER_SIZE; int id = i % MOV_INDEX_CLUSTER_SIZE; - if(oldval != track->cluster[cl][id].chunkSize) + if(oldval != track->cluster[cl][id].samplesInChunk) { put_be32(pb, i+1); // first chunk - put_be32(pb, track->cluster[cl][id].chunkSize); + put_be32(pb, track->cluster[cl][id].samplesInChunk); // samples per chunk put_be32(pb, 0x1); // sample description index - oldval = track->cluster[cl][id].chunkSize; + oldval = track->cluster[cl][id].samplesInChunk; index++; } } @@ -159,6 +165,7 @@ return updateSize (pb, pos); } +/* Sync sample atom */ static int mov_write_stss_tag(ByteIOContext *pb, MOVTrack* track) { long curpos; @@ -223,18 +230,22 @@ put_be32(pb, 0); /* Reserved */ put_be16(pb, 0); /* Reserved */ put_be16(pb, 1); /* Data-reference index, XXX == 1 */ - put_be32(pb, 0); /* Reserved */ + /* SoundDescription */ + put_be16(pb, 0); /* Version */ + put_be16(pb, 0); /* Revision level */ put_be32(pb, 0); /* Reserved */ put_be16(pb, track->enc->channels); /* Number of channels */ /* TODO: Currently hard-coded to 16-bit, there doesn't seem - to be a good way to get number of bits of audio */ + to be a good way to get number of bits of audio */ put_be16(pb, 0x10); /* Reserved */ put_be16(pb, 0); /* compression ID (= 0) */ put_be16(pb, 0); /* packet size (= 0) */ put_be16(pb, track->timescale); /* Time scale */ put_be16(pb, 0); /* Reserved */ + if(track->enc->codec_id == CODEC_ID_AAC) + mov_write_esds_tag(pb, track); if(track->enc->codec_id == CODEC_ID_AMR_NB) mov_write_damr_tag(pb); return updateSize (pb, pos); @@ -263,36 +274,86 @@ return 0x15; } -static unsigned int esdsLength(unsigned int len) +static unsigned int descrLength(unsigned int len) +{ + if (len < 0x00000080) + return 2 + len; + else if (len < 0x00004000) + return 3 + len; + else if(len < 0x00200000) + return 4 + len; + else + return 5 + len; +} + +static void putDescr(ByteIOContext *pb, int tag, int size) { - unsigned int result = 0; - unsigned char b = len & 0x7f; - result += b; - b = (len >> 8) & 0x7f; - result += (b + 0x80) << 8; - b = (len >> 16) & 0x7f; - result += (b + 0x80) << 16; - b = (len >> 24) & 0x7f; - result += (b + 0x80) << 24; - return result; + uint32_t len; + uint8_t vals[4]; + + len = size; + vals[3] = (uint8_t)(len & 0x7f); + len >>= 7; + vals[2] = (uint8_t)((len & 0x7f) | 0x80); + len >>= 7; + vals[1] = (uint8_t)((len & 0x7f) | 0x80); + len >>= 7; + vals[0] = (uint8_t)((len & 0x7f) | 0x80); + + put_byte(pb, tag); // DescriptorTag + + if (size < 0x00000080) + { + put_byte(pb, vals[3]); + } + else if (size < 0x00004000) + { + put_byte(pb, vals[2]); + put_byte(pb, vals[3]); + } + else if (size < 0x00200000) + { + put_byte(pb, vals[1]); + put_byte(pb, vals[2]); + put_byte(pb, vals[3]); + } + else if (size < 0x10000000) + { + put_byte(pb, vals[0]); + put_byte(pb, vals[1]); + put_byte(pb, vals[2]); + put_byte(pb, vals[3]); + } } static int mov_write_esds_tag(ByteIOContext *pb, MOVTrack* track) // Basic { - put_be32(pb, track->vosLen+18+14+17); - put_tag(pb, "esds"); - put_be32(pb, 0); // Version + int decoderSpecificInfoLen = track->vosLen ? descrLength(track->vosLen):0; + int pos = url_ftell(pb); - put_byte(pb, 0x03); // tag = ES_DescriptorTag - put_be32(pb, esdsLength(track->vosLen+18+14)); // Length - put_be16(pb, 0x0001); // ID (= 1) + put_be32(pb, 0); // size + put_tag(pb, "esds"); + put_be32(pb, 0); // Version + + // ES descriptor + putDescr(pb, 0x03, 3 + descrLength(13 + decoderSpecificInfoLen) + + descrLength(1)); + put_be16(pb, 0x0001); // ID (= 1) put_byte(pb, 0x00); // flags (= no flags) -// Decoderconfigdescriptor = 4 - put_byte(pb, 0x04); // tag = DecoderConfigDescriptor - put_be32(pb, esdsLength(track->vosLen+18)); // Length - put_byte(pb, 0x20); // Object type indication (Visual 14496-2) - put_byte(pb, 0x11); // flags (= Visualstream) + // DecoderConfig descriptor + putDescr(pb, 0x04, 13 + decoderSpecificInfoLen); + + if(track->enc->codec_id == CODEC_ID_AAC) + put_byte(pb, 0x40); // Object type indication + else if(track->enc->codec_id == CODEC_ID_MPEG4) + put_byte(pb, 0x20); // Object type indication (Visual 14496-2) + + if(track->enc->codec_type == CODEC_TYPE_AUDIO) + put_byte(pb, 0x15); // flags (= Audiostream) + else + put_byte(pb, 0x11); // flags (= Visualstream) + put_byte(pb, 0x0); // Buffersize DB (24 bits) put_be16(pb, 0x0dd2); // Buffersize DB @@ -300,15 +361,17 @@ put_be32(pb, 0x0002e918); // maxbitrate put_be32(pb, 0x00017e6b); // avg bitrate -// Decoderspecific info Tag = 5 - put_byte(pb, 0x05); // tag = Decoderspecific info - put_be32(pb, esdsLength(track->vosLen)); // length - put_buffer(pb, track->vosData, track->vosLen); - - put_byte(pb, 0x06); - put_be32(pb, esdsLength(1)); // length + if (track->vosLen) + { + // DecoderSpecific info descriptor + putDescr(pb, 0x05, track->vosLen); + put_buffer(pb, track->vosData, track->vosLen); + } + + // SL descriptor + putDescr(pb, 0x06, descrLength(1)); put_byte(pb, 0x02); - return track->vosLen+18+14+17; + return updateSize (pb, pos); } static int mov_write_video_tag(ByteIOContext *pb, MOVTrack* track) @@ -376,6 +439,7 @@ } /* TODO?: Currently all samples/frames seem to have same duration */ +/* Time to sample atom */ static int mov_write_stts_tag(ByteIOContext *pb, MOVTrack* track) { put_be32(pb, 0x18); /* size */ @@ -383,8 +447,8 @@ put_be32(pb, 0); /* version & flags */ put_be32(pb, 1); /* entry count */ - put_be32(pb, track->frameCount); /* sample count */ - put_be32(pb, track->sampleDelta); /* sample delta */ + put_be32(pb, track->sampleCount); /* sample count */ + put_be32(pb, track->sampleDuration); /* sample duration */ return 0x18; } @@ -491,15 +555,8 @@ put_be32(pb, 0); /* Version & flags */ put_be32(pb, track->time); /* creation time */ put_be32(pb, track->time); /* modification time */ - if(track->enc->codec_type == CODEC_TYPE_VIDEO) { - int64_t rate = track->enc->frame_rate; - put_be32(pb, rate); - put_be32(pb, rate*(int64_t)track->trackDuration/(int64_t)globalTimescale); // duration - } - else { - put_be32(pb, track->timescale); /* time scale (sample rate for audio) */ - put_be32(pb, track->trackDuration); /* duration */ - } + put_be32(pb, track->timescale); /* time scale (sample rate for audio) */ + put_be32(pb, track->trackDuration); /* duration */ put_be16(pb, 0); /* language, 0 = english */ put_be16(pb, 0); /* reserved (quality) */ return 32; @@ -644,46 +701,28 @@ mov->timescale = globalTimescale; for (i=0; itracks[i].entry > 0) { - if(mov->tracks[i].enc->codec_type == CODEC_TYPE_VIDEO) { - mov->tracks[i].timescale = globalTimescale; - mov->tracks[i].sampleDelta = mov->tracks[i].enc->frame_rate_base; - mov->tracks[i].frameCount = mov->tracks[i].samples; - mov->tracks[i].trackDuration = (int64_t)((int64_t)mov->tracks[i].entry* - (int64_t)globalTimescale*(int64_t)mov->tracks[i].enc->frame_rate_base)/(int64_t)mov->tracks[i].enc->frame_rate; + if(mov->tracks[i].entry <= 0) continue; + + if(mov->tracks[i].enc->codec_type == CODEC_TYPE_VIDEO) { + mov->tracks[i].timescale = mov->tracks[i].enc->frame_rate; + mov->tracks[i].sampleDuration = mov->tracks[i].enc->frame_rate_base; + } + else if(mov->tracks[i].enc->codec_type == CODEC_TYPE_AUDIO) { + /* If AMR, track timescale = 8000, AMR_WB = 16000 */ + if(mov->tracks[i].enc->codec_id == CODEC_ID_AMR_NB) { + mov->tracks[i].sampleDuration = 160; // Bytes per chunk + mov->tracks[i].timescale = 8000; } - else if(mov->tracks[i].enc->codec_type == CODEC_TYPE_AUDIO) { - long trackDuration = 0; - /* If AMR, track timescale = 8000, AMR_WB = 16000 */ - if(mov->tracks[i].enc->codec_id == CODEC_ID_AMR_NB) { - int j; - for (j=0; jtracks[i].samples; j++) { - int cl = j / MOV_INDEX_CLUSTER_SIZE; - int id = j % MOV_INDEX_CLUSTER_SIZE; - trackDuration += mov->tracks[i].cluster[cl][id].entries; - } - mov->tracks[i].sampleDelta = 160; // Bytes per chunk - mov->tracks[i].frameCount = mov->tracks[i].samples; - mov->tracks[i].trackDuration = - mov->tracks[i].samples * mov->tracks[i].sampleDelta; //trackDuration - mov->tracks[i].timescale = 8000; - } - else { - int j; - for (j=0; j<=mov->tracks[i].entry; j++) { - int cl = j / MOV_INDEX_CLUSTER_SIZE; - int id = j % MOV_INDEX_CLUSTER_SIZE; - trackDuration += mov->tracks[i].cluster[cl][id].len; - } - mov->tracks[i].frameCount = trackDuration; - mov->tracks[i].timescale = mov->tracks[i].enc->sample_rate; - mov->tracks[i].sampleDelta = 1; - mov->tracks[i].trackDuration = trackDuration; - } + else { + mov->tracks[i].timescale = mov->tracks[i].enc->sample_rate; + mov->tracks[i].sampleDuration = mov->tracks[i].enc->frame_size; } - mov->tracks[i].time = mov->time; - mov->tracks[i].trackID = i+1; } + + mov->tracks[i].trackDuration = + mov->tracks[i].sampleCount * mov->tracks[i].sampleDuration; + mov->tracks[i].time = mov->time; + mov->tracks[i].trackID = i+1; } mov_write_mvhd_tag(pb, mov); @@ -706,13 +745,23 @@ } /* TODO: This needs to be more general */ -int mov_write_ftyp_tag(ByteIOContext *pb) +int mov_write_ftyp_tag(ByteIOContext *pb, AVFormatContext *s) { put_be32(pb, 0x14 ); /* size */ put_tag(pb, "ftyp"); - put_tag(pb, "3gp4"); + + if (!strcmp("3gp", s->oformat->name)) + put_tag(pb, "3gp4"); + else + put_tag(pb, "isom"); + put_be32(pb, 0x200 ); - put_tag(pb, "3gp4"); + + if (!strcmp("3gp", s->oformat->name)) + put_tag(pb, "3gp4"); + else + put_tag(pb, "mp41"); + return 0x14; } @@ -721,8 +770,8 @@ ByteIOContext *pb = &s->pb; if(s->oformat != NULL) { - if(!strcmp("3gp", s->oformat->name)) - mov_write_ftyp_tag(pb); + if(!strcmp("3gp", s->oformat->name) || !strcmp("mp4", s->oformat->name)) + mov_write_ftyp_tag(pb,s); } put_flush_packet(pb); @@ -741,83 +790,78 @@ { MOVContext *mov = s->priv_data; ByteIOContext *pb = &s->pb; - AVCodecContext *enc; + AVCodecContext *enc = &s->streams[stream_index]->codec; + MOVTrack* trk = &mov->tracks[stream_index]; int cl, id; + unsigned int samplesInChunk = 0; - enc = &s->streams[stream_index]->codec; - if (!url_is_streamed(&s->pb)) { - MOVTrack* trk = &mov->tracks[stream_index]; - int sampleCount = 0; - unsigned int chunkSize = 0; + if (url_is_streamed(&s->pb)) return 0; /* Can't handle that */ + if (!size) return 0; /* Discard 0 sized packets */ - if(enc->codec_type == CODEC_TYPE_AUDIO) { + if (enc->codec_type == CODEC_TYPE_VIDEO ) { + samplesInChunk = 1; + } + else if (enc->codec_type == CODEC_TYPE_AUDIO ) { + if( enc->codec_id == CODEC_ID_AMR_NB) { /* We must find out how many AMR blocks there are in one packet */ - if(enc->codec_id == CODEC_ID_AMR_NB) { - static uint16_t packed_size[16] = {13, 14, 16, 18, 20, 21, 27, 32, 6, 0, 0, 0, 0, 0, 0, 0}; - int len = 0; + static uint16_t packed_size[16] = + {13, 14, 16, 18, 20, 21, 27, 32, 6, 0, 0, 0, 0, 0, 0, 0}; + int len = 0; - while(len < size && sampleCount < 100) { - len += packed_size[(buf[len] >> 3) & 0x0F]; - sampleCount++; - } - chunkSize = 1; - } - else { - sampleCount = size; - chunkSize = size/enc->channels; + while (len < size && samplesInChunk < 100) { + len += packed_size[(buf[len] >> 3) & 0x0F]; + samplesInChunk++; } } - else if(enc->codec_type == CODEC_TYPE_VIDEO) { - if(enc->codec_id == CODEC_ID_MPEG4 && - trk->vosLen == 0) - { - assert(enc->extradata_size); - - trk->vosLen = enc->extradata_size; - trk->vosData = av_malloc(trk->vosLen); - memcpy(trk->vosData, enc->extradata, trk->vosLen); - } - chunkSize = 1; + else if(enc->codec_id == CODEC_ID_PCM_ALAW) { + samplesInChunk = size/enc->channels; } - - cl = trk->entry / MOV_INDEX_CLUSTER_SIZE; - id = trk->entry % MOV_INDEX_CLUSTER_SIZE; - - if (trk->ents_allocated <= trk->entry) { - trk->cluster = av_realloc(trk->cluster, (cl+1)*sizeof(void*)); - if (!trk->cluster) - return -1; - trk->cluster[cl] = av_malloc(MOV_INDEX_CLUSTER_SIZE*sizeof(MOVIentry)); - if (!trk->cluster[cl]) - return -1; - trk->ents_allocated += MOV_INDEX_CLUSTER_SIZE; + else { + samplesInChunk = 1; } - if(mov->mdat_written == 0) { - mov_write_mdat_tag(pb, mov); - mov->mdat_written = 1; - mov->time = Timestamp(); - } - - trk->cluster[cl][id].pos = url_ftell(pb) - mov->movi_list; - trk->cluster[cl][id].chunkSize = chunkSize; - if(enc->channels > 1) - trk->cluster[cl][id].len = size/enc->channels; - else - trk->cluster[cl][id].len = size; - trk->cluster[cl][id].entries = sampleCount; - if(enc->codec_type == CODEC_TYPE_VIDEO) { - trk->cluster[cl][id].key_frame = enc->coded_frame->key_frame; - if(enc->coded_frame->pict_type == FF_I_TYPE) + } + + if ((enc->codec_id == CODEC_ID_MPEG4 || enc->codec_id == CODEC_ID_AAC) + && trk->vosLen == 0) { + assert(enc->extradata_size); + + trk->vosLen = enc->extradata_size; + trk->vosData = av_malloc(trk->vosLen); + memcpy(trk->vosData, enc->extradata, trk->vosLen); + } + + cl = trk->entry / MOV_INDEX_CLUSTER_SIZE; + id = trk->entry % MOV_INDEX_CLUSTER_SIZE; + + if (trk->ents_allocated <= trk->entry) { + trk->cluster = av_realloc(trk->cluster, (cl+1)*sizeof(void*)); + if (!trk->cluster) + return -1; + trk->cluster[cl] = av_malloc(MOV_INDEX_CLUSTER_SIZE*sizeof(MOVIentry)); + if (!trk->cluster[cl]) + return -1; + trk->ents_allocated += MOV_INDEX_CLUSTER_SIZE; + } + if (mov->mdat_written == 0) { + mov_write_mdat_tag(pb, mov); + mov->mdat_written = 1; + mov->time = Timestamp(); + } + + trk->cluster[cl][id].pos = url_ftell(pb) - mov->movi_list; + trk->cluster[cl][id].samplesInChunk = samplesInChunk; + trk->cluster[cl][id].size = size; + trk->cluster[cl][id].entries = samplesInChunk; + if(enc->codec_type == CODEC_TYPE_VIDEO) { + trk->cluster[cl][id].key_frame = enc->coded_frame->key_frame; + if(enc->coded_frame->pict_type == FF_I_TYPE) trk->hasKeyframes = 1; - } - trk->enc = enc; - trk->entry++; - if(sampleCount == 0) - trk->samples++; - else - trk->samples += sampleCount; - trk->mdat_size += size; } + trk->enc = enc; + trk->entry++; + trk->sampleCount += samplesInChunk; + trk->mdat_size += size; + put_buffer(pb, buf, size); put_flush_packet(pb);