changeset 298:0b2eed7b1210 libavformat

movenc patch by (Gildas Bazin <gbazin at altern dot org>)
author michael
date Mon, 03 Nov 2003 21:51:07 +0000
parents 85d558a18134
children 5ee1aa8cc0ea
files movenc.c
diffstat 1 files changed, 215 insertions(+), 171 deletions(-) [+]
line wrap: on
line diff
--- a/movenc.c	Sun Nov 02 21:55:38 2003 +0000
+++ b/movenc.c	Mon Nov 03 21:51:07 2003 +0000
@@ -1,6 +1,7 @@
 /*
  * MOV, 3GP, MP4 encoder.
  * Copyright (c) 2003 Thomas Raivio.
+ * Enhancements by Gildas Bazin <gbazin@netcourrier.com>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -28,22 +29,21 @@
 #define globalTimescale 1000
 
 typedef struct MOVIentry {
-    unsigned int flags, pos, len;
-    unsigned int chunkSize;
+    unsigned int flags, pos, size;
+    unsigned int samplesInChunk;
     char         key_frame;
     unsigned int entries;
 } MOVIentry;
 
 typedef struct MOVIndex {
     int         entry;
-    int         samples;
     int         mdat_size;
     int         ents_allocated;
     long        timescale;
     long        time;
-    long        frameCount;
     long        trackDuration;
-    long        sampleDelta;
+    long        sampleCount;
+    long        sampleDuration;
     int         hasKeyframes;
     int         trackID;
     AVCodecContext *enc;
@@ -63,6 +63,8 @@
     MOVTrack tracks[MAX_STREAMS];
 } MOVContext;
 
+static int mov_write_esds_tag(ByteIOContext *pb, MOVTrack* track);
+
 //FIXME supprt 64bit varaint with wide placeholders
 static int updateSize (ByteIOContext *pb, int pos)
 {
@@ -74,6 +76,7 @@
     return curpos - pos;
 }
 
+/* Chunk offset atom */
 static int mov_write_stco_tag(ByteIOContext *pb, MOVTrack* track)
 {
     int i;
@@ -90,10 +93,11 @@
     return updateSize (pb, pos);
 }
 
+/* Sample size atom */
 static int mov_write_stsz_tag(ByteIOContext *pb, MOVTrack* track)
 {
     int equalChunks = 1;
-    int i, tst = -1, oldtst = -1;
+    int i, j, entries = 0, tst = -1, oldtst = -1;
 
     int pos = url_ftell(pb);
     put_be32(pb, 0); /* size */
@@ -103,32 +107,34 @@
     for (i=0; i<track->entry; i++) {
         int cl = i / MOV_INDEX_CLUSTER_SIZE;
         int id = i % MOV_INDEX_CLUSTER_SIZE;
-        tst = track->cluster[cl][id].len;
-          if(oldtst != -1 && tst != oldtst) {
-          equalChunks = 0;
-          break;
+        tst = track->cluster[cl][id].size/track->cluster[cl][id].entries;
+        if(oldtst != -1 && tst != oldtst) {
+            equalChunks = 0;
         }
         oldtst = tst;
+        entries += track->cluster[cl][id].entries;
     }
-    if(equalChunks ||
-       track->enc->codec_type == CODEC_TYPE_AUDIO) {
-        //int sSize = track->cluster[0][0].len/track->cluster[0][0].entries;
-        int sSize = track->cluster[0][0].len;
+    if (equalChunks) {
+        int sSize = track->cluster[0][0].size/track->cluster[0][0].entries;
         put_be32(pb, sSize); // sample size 
-        put_be32(pb, track->samples/track->enc->channels); // sample count 
+        put_be32(pb, entries); // sample count
     }
     else {
         put_be32(pb, 0); // sample size 
-        put_be32(pb, track->entry); // sample count 
+        put_be32(pb, entries); // sample count 
         for (i=0; i<track->entry; i++) {
             int cl = i / MOV_INDEX_CLUSTER_SIZE;
             int id = i % MOV_INDEX_CLUSTER_SIZE;
-            put_be32(pb, track->cluster[cl][id].len);
+            for ( j=0; j<track->cluster[cl][id].entries; j++) {
+                put_be32(pb, track->cluster[cl][id].size /
+                         track->cluster[cl][id].entries);
+            }
         }
     }
     return updateSize (pb, pos);
 }
 
+/* Sample to chunk atom */
 static int mov_write_stsc_tag(ByteIOContext *pb, MOVTrack* track)
 {
     int index = 0, oldval = -1, i, entryPos, curpos;
@@ -142,12 +148,12 @@
     for (i=0; i<track->entry; i++) {
         int cl = i / MOV_INDEX_CLUSTER_SIZE;
         int id = i % MOV_INDEX_CLUSTER_SIZE;
-        if(oldval != track->cluster[cl][id].chunkSize) 
+        if(oldval != track->cluster[cl][id].samplesInChunk)
         {
             put_be32(pb, i+1); // first chunk 
-            put_be32(pb, track->cluster[cl][id].chunkSize);
+            put_be32(pb, track->cluster[cl][id].samplesInChunk); // samples per chunk
             put_be32(pb, 0x1); // sample description index 
-            oldval = track->cluster[cl][id].chunkSize;
+            oldval = track->cluster[cl][id].samplesInChunk;
             index++;
         }
     }
@@ -159,6 +165,7 @@
     return updateSize (pb, pos);
 }
 
+/* Sync sample atom */
 static int mov_write_stss_tag(ByteIOContext *pb, MOVTrack* track)
 {
     long curpos;
@@ -223,18 +230,22 @@
     put_be32(pb, 0); /* Reserved */
     put_be16(pb, 0); /* Reserved */
     put_be16(pb, 1); /* Data-reference index, XXX  == 1 */
-    put_be32(pb, 0); /* Reserved */
+    /* SoundDescription */
+    put_be16(pb, 0); /* Version */
+    put_be16(pb, 0); /* Revision level */
     put_be32(pb, 0); /* Reserved */
 
     put_be16(pb, track->enc->channels); /* Number of channels */
     /* TODO: Currently hard-coded to 16-bit, there doesn't seem
-	         to be a good way to get number of bits of audio */
+                 to be a good way to get number of bits of audio */
     put_be16(pb, 0x10); /* Reserved */
     put_be16(pb, 0); /* compression ID (= 0) */
     put_be16(pb, 0); /* packet size (= 0) */
     put_be16(pb, track->timescale); /* Time scale */
     put_be16(pb, 0); /* Reserved */
 
+    if(track->enc->codec_id == CODEC_ID_AAC)
+        mov_write_esds_tag(pb, track);
     if(track->enc->codec_id == CODEC_ID_AMR_NB)
         mov_write_damr_tag(pb);
     return updateSize (pb, pos);
@@ -263,36 +274,86 @@
     return 0x15;
 }
 
-static unsigned int esdsLength(unsigned int len)
+static unsigned int descrLength(unsigned int len)
+{
+    if (len < 0x00000080)
+        return 2 + len;
+    else if (len < 0x00004000)
+        return 3 + len;
+    else if(len < 0x00200000)
+        return 4 + len;
+    else
+        return 5 + len;
+}
+
+static void putDescr(ByteIOContext *pb, int tag, int size)
 {
-    unsigned int result = 0;
-    unsigned char b = len & 0x7f;
-    result += b;
-    b = (len >> 8) & 0x7f;
-    result += (b + 0x80) << 8;
-    b = (len >> 16) & 0x7f;
-    result += (b + 0x80) << 16;
-    b = (len >> 24) & 0x7f;
-    result += (b + 0x80) << 24;
-    return result;
+    uint32_t len;
+    uint8_t  vals[4];
+
+    len = size;
+    vals[3] = (uint8_t)(len & 0x7f);
+    len >>= 7;
+    vals[2] = (uint8_t)((len & 0x7f) | 0x80); 
+    len >>= 7;
+    vals[1] = (uint8_t)((len & 0x7f) | 0x80); 
+    len >>= 7;
+    vals[0] = (uint8_t)((len & 0x7f) | 0x80);
+
+    put_byte(pb, tag); // DescriptorTag
+
+    if (size < 0x00000080)
+    {
+        put_byte(pb, vals[3]);
+    }
+    else if (size < 0x00004000)
+    {
+        put_byte(pb, vals[2]);
+        put_byte(pb, vals[3]);
+    }
+    else if (size < 0x00200000)
+    {
+        put_byte(pb, vals[1]);
+        put_byte(pb, vals[2]);
+        put_byte(pb, vals[3]);
+    }
+    else if (size < 0x10000000)
+    {
+        put_byte(pb, vals[0]);
+        put_byte(pb, vals[1]);
+        put_byte(pb, vals[2]);
+        put_byte(pb, vals[3]);
+    }
 }
 
 static int mov_write_esds_tag(ByteIOContext *pb, MOVTrack* track) // Basic
 {
-    put_be32(pb, track->vosLen+18+14+17);
-    put_tag(pb, "esds");
-    put_be32(pb, 0);              // Version
+    int decoderSpecificInfoLen = track->vosLen ? descrLength(track->vosLen):0;
+    int pos = url_ftell(pb);
 
-    put_byte(pb, 0x03);            // tag = ES_DescriptorTag
-    put_be32(pb, esdsLength(track->vosLen+18+14));  // Length
-    put_be16(pb, 0x0001);         // ID (= 1)
+    put_be32(pb, 0);               // size
+    put_tag(pb, "esds");
+    put_be32(pb, 0);               // Version
+
+    // ES descriptor
+    putDescr(pb, 0x03, 3 + descrLength(13 + decoderSpecificInfoLen) +
+             descrLength(1));
+    put_be16(pb, 0x0001);          // ID (= 1)
     put_byte(pb, 0x00);            // flags (= no flags)
 
-// Decoderconfigdescriptor = 4
-    put_byte(pb, 0x04);            // tag = DecoderConfigDescriptor
-    put_be32(pb, esdsLength(track->vosLen+18));  // Length
-    put_byte(pb, 0x20);            // Object type indication (Visual 14496-2)
-    put_byte(pb, 0x11);            // flags (= Visualstream)
+    // DecoderConfig descriptor
+    putDescr(pb, 0x04, 13 + decoderSpecificInfoLen);
+
+    if(track->enc->codec_id == CODEC_ID_AAC)
+        put_byte(pb, 0x40);        // Object type indication
+    else if(track->enc->codec_id == CODEC_ID_MPEG4)
+        put_byte(pb, 0x20);        // Object type indication (Visual 14496-2)
+
+    if(track->enc->codec_type == CODEC_TYPE_AUDIO)
+        put_byte(pb, 0x15);            // flags (= Audiostream)
+    else
+        put_byte(pb, 0x11);            // flags (= Visualstream)
+
     put_byte(pb, 0x0);             // Buffersize DB (24 bits)
     put_be16(pb, 0x0dd2);          // Buffersize DB
 
@@ -300,15 +361,17 @@
     put_be32(pb, 0x0002e918);     // maxbitrate
     put_be32(pb, 0x00017e6b);     // avg bitrate
 
-// Decoderspecific info Tag = 5
-    put_byte(pb, 0x05);           // tag = Decoderspecific info
-    put_be32(pb, esdsLength(track->vosLen));   // length
-    put_buffer(pb, track->vosData, track->vosLen);
-    
-    put_byte(pb, 0x06);
-    put_be32(pb, esdsLength(1));  // length
+    if (track->vosLen)
+    {
+        // DecoderSpecific info descriptor
+        putDescr(pb, 0x05, track->vosLen);
+        put_buffer(pb, track->vosData, track->vosLen);
+    }
+
+    // SL descriptor
+    putDescr(pb, 0x06, descrLength(1));
     put_byte(pb, 0x02);
-    return track->vosLen+18+14+17;
+    return updateSize (pb, pos);
 }
 
 static int mov_write_video_tag(ByteIOContext *pb, MOVTrack* track)
@@ -376,6 +439,7 @@
 }
 
 /* TODO?: Currently all samples/frames seem to have same duration */
+/* Time to sample atom */
 static int mov_write_stts_tag(ByteIOContext *pb, MOVTrack* track)
 {
     put_be32(pb, 0x18); /* size */
@@ -383,8 +447,8 @@
     put_be32(pb, 0); /* version & flags */
     put_be32(pb, 1); /* entry count */
 
-    put_be32(pb, track->frameCount); /* sample count */
-    put_be32(pb, track->sampleDelta); /* sample delta */
+    put_be32(pb, track->sampleCount); /* sample count */
+    put_be32(pb, track->sampleDuration); /* sample duration */
     return 0x18;
 }
 
@@ -491,15 +555,8 @@
     put_be32(pb, 0); /* Version & flags */
     put_be32(pb, track->time); /* creation time */
     put_be32(pb, track->time); /* modification time */
-    if(track->enc->codec_type == CODEC_TYPE_VIDEO) {
-        int64_t rate = track->enc->frame_rate;
-        put_be32(pb, rate); 
-        put_be32(pb, rate*(int64_t)track->trackDuration/(int64_t)globalTimescale); // duration 
-    }
-    else {
-      put_be32(pb, track->timescale); /* time scale (sample rate for audio) */ 
-      put_be32(pb, track->trackDuration); /* duration */
-    }
+    put_be32(pb, track->timescale); /* time scale (sample rate for audio) */ 
+    put_be32(pb, track->trackDuration); /* duration */
     put_be16(pb, 0); /* language, 0 = english */
     put_be16(pb, 0); /* reserved (quality) */
     return 32;
@@ -644,46 +701,28 @@
     mov->timescale = globalTimescale;
 
     for (i=0; i<MAX_STREAMS; i++) {
-        if(mov->tracks[i].entry > 0) {
-            if(mov->tracks[i].enc->codec_type == CODEC_TYPE_VIDEO) {
-                mov->tracks[i].timescale = globalTimescale;
-                mov->tracks[i].sampleDelta = mov->tracks[i].enc->frame_rate_base;
-                mov->tracks[i].frameCount = mov->tracks[i].samples;
-                mov->tracks[i].trackDuration = (int64_t)((int64_t)mov->tracks[i].entry*
-                    (int64_t)globalTimescale*(int64_t)mov->tracks[i].enc->frame_rate_base)/(int64_t)mov->tracks[i].enc->frame_rate;
+        if(mov->tracks[i].entry <= 0) continue;
+
+        if(mov->tracks[i].enc->codec_type == CODEC_TYPE_VIDEO) {
+            mov->tracks[i].timescale = mov->tracks[i].enc->frame_rate;
+            mov->tracks[i].sampleDuration = mov->tracks[i].enc->frame_rate_base;
+        }
+        else if(mov->tracks[i].enc->codec_type == CODEC_TYPE_AUDIO) {
+            /* If AMR, track timescale = 8000, AMR_WB = 16000 */
+            if(mov->tracks[i].enc->codec_id == CODEC_ID_AMR_NB) {
+                mov->tracks[i].sampleDuration = 160;  // Bytes per chunk
+                mov->tracks[i].timescale = 8000;
             }
-            else if(mov->tracks[i].enc->codec_type == CODEC_TYPE_AUDIO) {
-                long trackDuration = 0;
-                /* If AMR, track timescale = 8000, AMR_WB = 16000 */
-                if(mov->tracks[i].enc->codec_id == CODEC_ID_AMR_NB) {
-                    int j;
-                    for (j=0; j<mov->tracks[i].samples; j++) {
-                        int cl = j / MOV_INDEX_CLUSTER_SIZE;
-                        int id = j % MOV_INDEX_CLUSTER_SIZE;
-                        trackDuration += mov->tracks[i].cluster[cl][id].entries;
-                    }
-                    mov->tracks[i].sampleDelta = 160;  // Bytes per chunk
-                    mov->tracks[i].frameCount = mov->tracks[i].samples;
-                    mov->tracks[i].trackDuration = 
-                        mov->tracks[i].samples * mov->tracks[i].sampleDelta; //trackDuration
-                    mov->tracks[i].timescale = 8000;
-                }
-                else {
-                    int j;
-                    for (j=0; j<=mov->tracks[i].entry; j++) {
-                        int cl = j / MOV_INDEX_CLUSTER_SIZE;
-                        int id = j % MOV_INDEX_CLUSTER_SIZE;
-                        trackDuration += mov->tracks[i].cluster[cl][id].len;
-                    }
-                    mov->tracks[i].frameCount = trackDuration;
-                    mov->tracks[i].timescale = mov->tracks[i].enc->sample_rate;
-                    mov->tracks[i].sampleDelta = 1;
-                    mov->tracks[i].trackDuration = trackDuration;
-                }
+            else {
+                mov->tracks[i].timescale = mov->tracks[i].enc->sample_rate;
+                mov->tracks[i].sampleDuration = mov->tracks[i].enc->frame_size;
             }
-            mov->tracks[i].time = mov->time;
-            mov->tracks[i].trackID = i+1;
         }
+
+        mov->tracks[i].trackDuration = 
+            mov->tracks[i].sampleCount * mov->tracks[i].sampleDuration;
+        mov->tracks[i].time = mov->time;
+        mov->tracks[i].trackID = i+1;
     }
 
     mov_write_mvhd_tag(pb, mov);
@@ -706,13 +745,23 @@
 }
 
 /* TODO: This needs to be more general */
-int mov_write_ftyp_tag(ByteIOContext *pb)
+int mov_write_ftyp_tag(ByteIOContext *pb, AVFormatContext *s)
 {
     put_be32(pb, 0x14 ); /* size */
     put_tag(pb, "ftyp");
-    put_tag(pb, "3gp4");
+
+    if (!strcmp("3gp", s->oformat->name))
+        put_tag(pb, "3gp4");
+    else
+        put_tag(pb, "isom");
+
     put_be32(pb, 0x200 );
-    put_tag(pb, "3gp4");
+
+    if (!strcmp("3gp", s->oformat->name))
+        put_tag(pb, "3gp4");
+    else
+        put_tag(pb, "mp41");
+
     return 0x14;
 }
 
@@ -721,8 +770,8 @@
     ByteIOContext *pb = &s->pb;
 
     if(s->oformat != NULL) {
-    if(!strcmp("3gp", s->oformat->name))
-        mov_write_ftyp_tag(pb);
+    if(!strcmp("3gp", s->oformat->name) || !strcmp("mp4", s->oformat->name))
+        mov_write_ftyp_tag(pb,s);
     }
 
     put_flush_packet(pb);
@@ -741,83 +790,78 @@
 {
     MOVContext *mov = s->priv_data;
     ByteIOContext *pb = &s->pb;
-    AVCodecContext *enc;
+    AVCodecContext *enc = &s->streams[stream_index]->codec;
+    MOVTrack* trk = &mov->tracks[stream_index];
     int cl, id;
+    unsigned int samplesInChunk = 0;
 
-    enc = &s->streams[stream_index]->codec;
-    if (!url_is_streamed(&s->pb)) {
-        MOVTrack* trk = &mov->tracks[stream_index];
-        int sampleCount = 0;
-        unsigned int chunkSize = 0;
+    if (url_is_streamed(&s->pb)) return 0; /* Can't handle that */
+    if (!size) return 0; /* Discard 0 sized packets */
 
-        if(enc->codec_type == CODEC_TYPE_AUDIO) {
+    if (enc->codec_type == CODEC_TYPE_VIDEO ) {
+        samplesInChunk = 1;
+    }
+    else if (enc->codec_type == CODEC_TYPE_AUDIO ) {
+        if( enc->codec_id == CODEC_ID_AMR_NB) {
             /* We must find out how many AMR blocks there are in one packet */
-            if(enc->codec_id == CODEC_ID_AMR_NB) {
-                static uint16_t packed_size[16] = {13, 14, 16, 18, 20, 21, 27, 32, 6, 0, 0, 0, 0, 0, 0, 0};             
-                int len = 0;
+            static uint16_t packed_size[16] =
+                {13, 14, 16, 18, 20, 21, 27, 32, 6, 0, 0, 0, 0, 0, 0, 0};
+            int len = 0;
 
-                while(len < size && sampleCount < 100) {
-                    len += packed_size[(buf[len] >> 3) & 0x0F];
-                    sampleCount++;
-                }
-                chunkSize = 1;
-            }
-            else {
-                sampleCount = size;
-                chunkSize = size/enc->channels;
+            while (len < size && samplesInChunk < 100) {
+                len += packed_size[(buf[len] >> 3) & 0x0F];
+                samplesInChunk++;
             }
         }
-        else if(enc->codec_type == CODEC_TYPE_VIDEO) {
-            if(enc->codec_id == CODEC_ID_MPEG4 &&
-               trk->vosLen == 0)
-            {
-                assert(enc->extradata_size);
-
-                trk->vosLen = enc->extradata_size;
-                trk->vosData = av_malloc(trk->vosLen);
-                memcpy(trk->vosData, enc->extradata, trk->vosLen);
-            }
-            chunkSize = 1;
+        else if(enc->codec_id == CODEC_ID_PCM_ALAW) {
+            samplesInChunk = size/enc->channels;
         }
-
-        cl = trk->entry / MOV_INDEX_CLUSTER_SIZE;
-        id = trk->entry % MOV_INDEX_CLUSTER_SIZE;
-
-        if (trk->ents_allocated <= trk->entry) {
-            trk->cluster = av_realloc(trk->cluster, (cl+1)*sizeof(void*)); 
-            if (!trk->cluster)
-                return -1;
-            trk->cluster[cl] = av_malloc(MOV_INDEX_CLUSTER_SIZE*sizeof(MOVIentry));
-            if (!trk->cluster[cl])
-                return -1;
-            trk->ents_allocated += MOV_INDEX_CLUSTER_SIZE;
+        else {
+            samplesInChunk = 1;
         }
-        if(mov->mdat_written == 0) {
-            mov_write_mdat_tag(pb, mov);
-            mov->mdat_written = 1;
-            mov->time = Timestamp();
-        }
-        
-        trk->cluster[cl][id].pos = url_ftell(pb) - mov->movi_list;
-        trk->cluster[cl][id].chunkSize = chunkSize;
-        if(enc->channels > 1)
-          trk->cluster[cl][id].len = size/enc->channels;
-        else
-          trk->cluster[cl][id].len = size;
-        trk->cluster[cl][id].entries = sampleCount;
-        if(enc->codec_type == CODEC_TYPE_VIDEO) {
-            trk->cluster[cl][id].key_frame = enc->coded_frame->key_frame;
-            if(enc->coded_frame->pict_type == FF_I_TYPE)
+    }
+
+    if ((enc->codec_id == CODEC_ID_MPEG4 || enc->codec_id == CODEC_ID_AAC)
+        && trk->vosLen == 0) {
+        assert(enc->extradata_size);
+
+        trk->vosLen = enc->extradata_size;
+        trk->vosData = av_malloc(trk->vosLen);
+        memcpy(trk->vosData, enc->extradata, trk->vosLen);
+    }
+
+    cl = trk->entry / MOV_INDEX_CLUSTER_SIZE;
+    id = trk->entry % MOV_INDEX_CLUSTER_SIZE;
+
+    if (trk->ents_allocated <= trk->entry) {
+        trk->cluster = av_realloc(trk->cluster, (cl+1)*sizeof(void*)); 
+        if (!trk->cluster)
+            return -1;
+        trk->cluster[cl] = av_malloc(MOV_INDEX_CLUSTER_SIZE*sizeof(MOVIentry));
+        if (!trk->cluster[cl])
+            return -1;
+        trk->ents_allocated += MOV_INDEX_CLUSTER_SIZE;
+    }
+    if (mov->mdat_written == 0) {
+        mov_write_mdat_tag(pb, mov);
+        mov->mdat_written = 1;
+        mov->time = Timestamp();
+    }
+
+    trk->cluster[cl][id].pos = url_ftell(pb) - mov->movi_list;
+    trk->cluster[cl][id].samplesInChunk = samplesInChunk;
+    trk->cluster[cl][id].size = size;
+    trk->cluster[cl][id].entries = samplesInChunk;
+    if(enc->codec_type == CODEC_TYPE_VIDEO) {
+        trk->cluster[cl][id].key_frame = enc->coded_frame->key_frame;
+        if(enc->coded_frame->pict_type == FF_I_TYPE)
             trk->hasKeyframes = 1;
-        }
-        trk->enc = enc;
-        trk->entry++;
-        if(sampleCount == 0)
-            trk->samples++;
-        else
-            trk->samples += sampleCount;
-        trk->mdat_size += size;
     }
+    trk->enc = enc;
+    trk->entry++;
+    trk->sampleCount += samplesInChunk;
+    trk->mdat_size += size;
+
     put_buffer(pb, buf, size);
 
     put_flush_packet(pb);