Mercurial > mplayer.hg
changeset 16403:a90d07677f2d
Obsoleted...
author | ods15 |
---|---|
date | Tue, 06 Sep 2005 05:33:14 +0000 |
parents | 75532be41c6d |
children | 4a1faa9c8f07 |
files | DOCS/tech/mncf.txt |
diffstat | 1 files changed, 0 insertions(+), 658 deletions(-) [+] |
line wrap: on
line diff
--- a/DOCS/tech/mncf.txt Mon Sep 05 12:05:29 2005 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,658 +0,0 @@ - NUT Open Container Format DRAFT 20041005 (Michael's experimental fork) - ---------------------------------------- - - - - Intro: - -Features / goals: - (supported by the format, not necessary by a specific implementation) - -Simple - use the same encoding for nearly all fields - simple decoding, so slow cpus (and embedded systems) can handle it -Extendible - no limit for the possible values for all fields (using universal vlc) - allow adding of new headers in the future - allow adding more fields at the end of headers -Compact - ~0.2% overhead, for normal bitrates - index is <10kb per hour (1 keyframe every 3sec) - a usual header for a file is about 100bytes (audio + video headers together) - a packet header is about ~1-8 bytes -Error resistant - seeking / playback without an index - headers & index can be repeated - damaged files can be played back with minimal data lost and fast - resyncing times - - - - Definitions: - -MUST the specific part must be done to conform to this standard -SHOULD its recommanded to be done that way but its not strictly required - - - - Syntax: - - Type definitions: - -f(x) n fixed bits in big-endian order -u(x) unsigned number encoded in x bits in MSB first order - -v - value=0 - do{ - more_data u(1) - data u(7) - value= 128*value + data - }while(more_data) - -s - temp v - temp++ - if(temp&1) value= -(temp>>1) - else value= (temp>>1) - -b (binary data or string) - for(i=0; i<length; i++){ - data[i] u(8) - } - Note: strings MUST be encoded in utf8 - -vb - length v - value b - - - Bitstream syntax: -packet header - forward ptr v - -align_byte - while(not byte aligned) - one f(1) - -reserved_bytes - for(i=0; i<forward_ptr - length_of_non_reserved; i++) - reserved u(8) - a demuxer MUST ignore any reserved bytes - a muxer MUST NOT write any reserved bytes, as this would make it - inpossible to add new fields at the end of packets in the future in - a compatible way - -main header: - main_startcode f(64) - packet header - branch v - version v - stream_count v - max_distance v - max_index_distance v - global_time_base_nom v - global_time_base_denom v - for(i=0; i<256; ){ - tmp_flag v - tmp_fields v - if(tmp_fields>0) tmp_timestamp s - if(tmp_fields>1) tmp_mul v - if(tmp_fields>2) tmp_stream v - if(tmp_fields>3) tmp_size v - else tmp_size=0 - if(tmp_fields>4) tmp_res v - else tmp_res=0 - if(tmp_fields>5) count v - else count= tmp_mul - tmp_size - for(j=6; j<tmp_fields; j++){ - tmp_reserved[i] v - } - for(j=0; j<count && i<256; j++, i++){ - flags[i]= tmp_flag; - stream_id_plus1[i]= tmp_stream; - data_size_mul[i]= tmp_mul; - data_size_lsb[i]= tmp_size + j; - timestamp_delta[i]= tmp_timestamp; - reserved_count[i]= tmp_res; - } - } - reserved_bytes - checksum u(32) - -stream_header: - stream_startcode f(64) - packet_header - stream_id v - stream_class v - fourcc vb - average_bitrate v - time_base_nom v - time_base_denom v - msb_timestamp_shift v - decode_delay v - fixed_fps u(1) - reserved u(6) - codec_specific_data vb - -video_stream_header: - stream_header - width v - height v - sample_width v - sample_height v - colorspace_type v - reserved_bytes - checksum u(32) - -audio_stream_header: - stream_header - samplerate_nom v - samplerate_denom v - channel_count v - reserved_bytes - checksum u(32) - - -frame - frame_code f(8) - if(stream_id_plus1[frame_code]==0){ - stream_id v - } - if(timestamp_delta[frame_code]==0){ - coded_timestamp v - } - if(flags[frame_code]&1){ - data_size_msb v - } - for(i=0; i<reserved_count[frame_code]; i++) - reserved v - data - -Index: - index_startcode f(64) - packet header - stream_id v - index_length v - for(i=0; i<index_length; i++){ - index_timestamp v - index_position v - } - reserved_bytes - checksum u(32) - -info_packet: (optional) - info_startcode f(64) - packet header - for(;;){ - id v - if(id==0) break - name= info_table[id][0] - type= info_table[id][1] - if(type==NULL) - type vb - if(name==NULL) - name vb - if(type=="v") - value v - else - value vb - } - reserved_bytes - checksum u(32) - -sync_point - frame_startcode f(64) - global_timestamp v - -file - file_id_string - while(!eof && next_code != index_startcode){ - main_header - for(i=0; i<stream_count; i++){ - if(next_packet==video_stream_header) - video_stream_header - else - audio_stream_header - } - while(next_code != main_startcode){ - if(next_code == info_startcode) - info_packet - else{ - if(next_code == frame_startcode) - sync_point - frame - } - } - } - index - -forward_ptr - size of the packet (exactly the distance from the first byte of the - startcode of the current packet to the first byte of the following packet - -file_id_string - "nut/multimedia container\0" - -*_startcode - all startcodes start with 'N' - -main_startcode - 0x7A561F5F04ADULL + (((uint64_t)('N'<<8) + 'M')<<48) -stream_starcode - 0x11405BF2F9DBULL + (((uint64_t)('N'<<8) + 'S')<<48) -frame_startcode - 0xE4ADEECA4569ULL + (((uint64_t)('N'<<8) + 'K')<<48) - frame_startcodes SHOULD be placed immedeatly before a keyframe if the - previous frame of the same stream was a non-keyframe, unless such - non-keyframe - keyframe tansitions are very frequent - -index_startcode - 0xDD672F23E64EULL + (((uint64_t)('N'<<8) + 'X')<<48) -info_startcode - 0xAB68B596BA78ULL + (((uint64_t)('N'<<8) + 'I')<<48) - -branch - 1 for my fork, 0 for the official nut container - -version - 2 for now - -max_distance - max distance of frame_startcodes, the distance may only be larger if - there is only a single frame between the 2 frame_startcodes - this can be used by the demuxer to detect damaged frame headers if the - damage results in a too long chain - SHOULD be set to <=32768 or at least <=65536 unless there is a very good - reason to set it higher otherwise reasonable error recovery will be - impossible - -max_index_distance - max distance of keyframes which are represented in the index, the - distance between consecutive entries A and B may only be larger if - there are no keyframes within this stream between A and B - SHOULD be set to <=32768 or at least <=65536 unless there is a very good - reason to set it higher - -stream_id - Note: streams with a lower relative class MUST have a lower relative id - so a stream with class 0 MUST allways have a id which is lower then any - stream with class > 0 - stream_id MUST be < stream_count - -stream_class - 0 video - 32 audio - 64 subtiles - Note the remaining values are reserved and MUST NOT be used - a demuxer MUST ignore streams with reserved classes - -fourcc - identification for the codec - example: "H264" - MUST contain 2 or 4 bytes, note, this might be increased in the future - if needed - -time_base_nom / time_base_denom = time_base - the number of timer ticks per second, this MUST be equal to the fps - if the fixed_fps is 1 - time_base_denom MUST not be 0 - time_base_nom and time_base_denom MUST be relative prime - time_base_nom MUST be < 2^31 - examples: - fps time_base_nom time_base_denom - 30 30 1 - 29.97 30000 1001 - 23.976 24000 1001 - sample_rate sample_rate_mul time_base_nom time_base_denom - 44100 1 44100 1 - 44100 64 11025 16 - 48000 1024 375 8 - Note: the advantage to using a large sample_rate_mul is that the - timestamps need fewer bits - -global_time_base_nom / global_time_base_denom = global_time_base - the number of timer ticks per second - global_time_base_denom MUST not be 0 - global_time_base_nom and global_time_base_denom MUST be relative prime - global_time_base_nom MUST be < 2^31 - -global_timestamp - timestamp in global_time_base units - when a global_timestamp is encountered the last_timestamp of all streams - is set to the following: - ln= global_time_base_denom*time_base_nom - sn= global_timestamp - d1= global_time_base_nom - d2= time_base_denom - last_timestamp= (ln/d1*sn + ln%d1*sn/d1)/d2 - Note, this calculation MUST be done with unsigned 64 bit integers, and - is equivalent to (ln*sn)/(d1*d2) but this would require a 96bit integer - -msb_timestamp_shift - amount of bits in lsb_timestamp - MUST be <16 - -decode_delay - maximum time between input and output for a codec, used to generate dts - from pts - is 0 for streams without b frames, and 1 for streams with b frames, may - be larger for future codecs - -fixed_fps - 1 indicates that the fps is fixed - -codec_specific_data - private global data for a codec (could be huffman tables or ...) - -frame_code - the meaning of this byte is stored in the main header - the value 78 ('N') is forbidden to ensure that the byte is always - different from the first byte of any startcode - -flags[frame_code] - the bits of the flags from MSB to LSB are KD - if D is 1 then data_size_msb is coded, otherwise data_size_msb is 0 - K is the keyframe_type - 0-> no keyframe, - 1-> keyframe, - flags=4 can be used to mark illegal frame_code bytes - frame_code=78 must have flags=4 - * frames MUST not depend(1) upon frames prior to the last - frame_startcode - depend(1) means dependancy on the container level (NUT) not dependancy - on the codec level - -stream_id_plus1[frame_code] - must be <250 - if its 0 then the stream_id is coded in the frame - -data_size_mul[frame_code] - must be <16384 - -data_size_lsb[frame_code] - must be <16384 - -timestamp_delta[frame_code] - must be <16384 and >-16384 - -data_size - data_size= data_size_lsb + data_size_msb*data_size_mul; - -coded_timestamp - if coded_timestamp < (1<<msb_timestamp_shift) then its a - lsb timestamp, otherwise its a full timestamp + (1<<msb_timestamp_shift) - lsb timestamps are converted to full timesamps by: - mask = (1<<msb_timestamp_shift)-1; - delta= last_timestamp - mask/2 - timestamp= ((timestamp_lsb-delta)&mask) + delta - a full timestamp must be used if there is no reference timestamp - available after the last frame_startcode with the current stream_id - -lsb_timestamp - least significant bits of the timestamp in time_base precission - Example: IBBP display order - keyframe timestamp=0 -> timestamp=0 - frame lsb_timestamp=3 -> timestamp=3 - frame lsb_timestamp=1 -> timestamp=1 - frame lsb_timestamp=2 -> timestamp=2 - ... - keyframe msb_timestamp=257 -> timestamp=257 - frame lsb_timestamp=255->timestamp=255 - frame lsb_timestamp=0 -> timestamp=256 - frame lsb_timestamp=4 -> timestamp=260 - frame lsb_timestamp=2 -> timestamp=258 - frame lsb_timestamp=3 -> timestamp=259 - all timestamps of keyframes of a single stream MUST be monotone - -dts - dts are calculated by using a decode_delay+1 sized buffer for each - stream, into which the current pts is inserted and the element with - the smallest value is removed, this is then the current dts - this buffer is initalized with decode_delay -1 elements - all frames with dts == timestamp must be monotone, that means a frame - which occures later in the stream must have a larger or equal dts - then an earlier frame - FIXME rename timestamp* to pts* ? - -width/height - MUST be set to the coded width/height - -sample_width/sample_height (aspect ratio) - sample_width is the horizontal distance between samples - sample_width and sample_height MUST be relative prime if not zero - MUST be 0 if unknown - -colorspace_type - 0 unknown - 1 ITU Rec 624 / ITU Rec 601 Y range: 16..235 Cb/Cr range: 16..240 - 2 ITU Rec 709 Y range: 16..235 Cb/Cr range: 16..240 - 17 ITU Rec 624 / ITU Rec 601 Y range: 0..255 Cb/Cr range: 0..255 - 18 ITU Rec 709 Y range: 0..255 Cb/Cr range: 0..255 - -samplerate_nom / samplerate_denom = samplerate - the number of samples per second - -checksum - adler32 checksum - -index_timestamp - value of the timetamp of a keyframe relative to the last keyframe - stored in this index - -index_position - position in bytes of the first byte of a keyframe, relative to the - last keyframe stored in this index - there MUST be no keyframe with the same stream_id as this index between - 2 consecutive index entries if they are more then max_index_distance - appart - -id - the id of the type/name pair, so its more compact - 0 means end - -type - for example: "UTF8" -> String or "JPEG" -> jpeg image - Note: nonstandard fields should be prefixed by "X-" - Note: MUST be less than 6 byte long (might be increased to 64 later) - -name - the name of the info entry, valid names are - "TotalTime" total length of the stream in msecs - "StreamId" the stream(s) to which the info packet applies - "StartTimestamp" - "EndTimestamp" the time range in msecs to which the info applies - "SegmentId" a unique id for the streams + time specified - "Author" - "Description" - "Copyright" - "Encoder" the name & version of the software used for encoding - "Title" - "Cover" an image of the (cd,dvd,vhs,..) cover (preferable PNG or JPEG) - "Source" "DVD", "VCD", "CD", "MD", "FM radio", "VHS", "TV", - "LD" - Optional: appended PAL,NTSC,SECAM, ... in parentheses - "CaptureDevice" "BT878", "BT848", "webcam", ... (more exact names are fine too) - "CreationTime" "2003-01-20 20:13:15Z", ... - (ISO 8601 format, see http://www.cl.cam.ac.uk/~mgk25/iso-time.html) - Note: dont forget the timezone - "ReplayGain" - "Keywords" - "Language" ISO 639 and ISO 3166 for language/country code - something like "usen" (US english), can be 0 if unknown - and "multi" if several languages - see http://www.loc.gov/standards/iso639-2/englangn.html - and http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.htmlthe language code - "Disposition" "original", "dub" (translated), "comment", "lyrics", "karaoke" - Note: if someone needs some others, please tell us about them, so we can - add them to the official standard (if they are sane) - Note: nonstandard fields should be prefixed by "X-" - Note: MUST be less than 64 bytes long - -value - value of this name/type pair - -stuffing - 0x80 can be placed infront of any type v entry for stuffing - purposes - -info_table[][2]={ - {NULL , NULL }, // end - {NULL , NULL }, - {NULL , "UTF8"}, - {NULL , "v"}, - {NULL , "s"}, - {"StreamId" , "v"}, - {"SegmentId" , "v"}, - {"StartTimestamp" , "v"}, - {"EndTimestamp" , "v"}, - {"Author" , "UTF8"}, - {"Titel" , "UTF8"}, - {"Language" , "UTF8"}, - {"Description" , "UTF8"}, - {"Copyright" , "UTF8"}, - {"Encoder" , "UTF8"}, - {"Keyword" , "UTF8"}, - {"Cover" , "JPEG"}, - {"Cover" , "PNG"}, - {"Disposition" , "UTF8"}, -}; - - Structure: - -the headers MUST be in exactly the following order (to simplify demuxer design) -main header -stream_header (id=0) -stream_header (id=1) -... -stream_header (id=n) - -headers may be repated, but if they are then they MUST all be repeated together -and repeated headers MUST be identical -headers MAY only repeated at the closest possible positions after 2^x where x is -an integer and the file end, so the headers may be repeated at 4102 if thats the -closest possition after 2^12=4096 at which the headers can be placed - -headers MUST be placed at least at the begin of the file and immedeatly before -the index or at the file end if there is no index -headers MUST be repeated at least twice (so they exist 3 times in a file) - -a demuxer MUST not demux a stream which contains more than one stream, or which -is wrapped in a structure to facilitate more than one stream or otherwise -duplicate the role of a container. any such file is to be considered invalid - -info packets which describe the whole file or individual streams/tracks must be -placed before any video/audio/... frames - - Index -Note: in case of realtime streaming there is no end, so no index there either - - Info packets -the info_packet can be repeated, it can also contain different names & values -each time but only if allso the time is different -Info packets can be used to describe the file or some part of it (chapters) - -info packets, SHOULD be placed at the begin of the file at least -for realtime streaming info packets will normally be transmitted when they apply -for example, the current song title & artist of the currently shown music video - - Unknown packets -MUST be ignored by the demuxer - - demuxer (non-normative) - -in the absence of valid header at beginning, players SHOULD search for backup -headers starting at offset 2^x for each x players SHOULD end their search from a -particular offset when any startcode is found (including syncpoint) - - - Sample code (GPL, & untested) - -typedef BufferContext{ - uint8_t *buf; - uint8_t *buf_ptr; -}BufferContext; - -static inline uint64_t get_bytes(BufferContext *bc, int count){ - uint64_t val=0; - - assert(count>0 && count<9) - - for(i=0; i<count; i++){ - val <<=8; - val += *(bc->buf_ptr++); - } - - return val; -} - -static inline void put_bytes(BufferContext *bc, int count, uint64_t val){ - uint64_t val=0; - - assert(count>0 && count<9) - - for(i=count-1; i>=0; i--){ - *(bc->buf_ptr++)= val >> (8*i); - } - - return val; -} - -static inline uint64_t get_v(BufferContext *bc){ - uint64_t val= 0; - - for(; space_left(bc) > 0; ){ - int tmp= *(bc->buf_ptr++); - if(tmp&0x80) - val= (val<<7) + tmp - 0x80; - else - return (val<<7) + tmp; - } - - return -1; -} - -static inline int put_v(BufferContext *bc, uint64_t val){ - int i; - - if(space_left(bc) < 9) return -1; - - val &= 0x7FFFFFFFFFFFFFFFULL; // FIXME can only encode upto 63 bits currently - for(i=7; ; i+=7){ - if(val>>i == 0) break; - } - - for(i-=7; i>0; i-=7){ - *(bc->buf_ptr++)= 0x80 | (val>>i); - } - *(bc->buf_ptr++)= val&0x7F; - - return 0; -} - -static int64_t get_dts(int64_t pts, int64_t *pts_cache, int delay, int reset){ - if(reset) memset(pts_cache, -1, delay*sizeof(int64_t)); - - while(delay--){ - int64_t t= pts_cache[delay]; - if(t < pts){ - pts_cache[delay]= pts; - pts= t; - } - } - - return pts; -} - - Authors - -Folks from MPlayer Developers Mailinglist (http://www.mplayehrq.hu/). -Authors in ABC-order: (FIXME! Tell us if we left you out) - Beregszaszi, Alex (alex@fsn.hu) - Bunkus, Moritz (moritz@bunkus.org) - Diedrich, Tobias (ranma+mplayer@tdiedrich.de) - Felker, Rich (dalias@aerifal.cx) - Franz, Fabian (FabianFranz@gmx.de) - Gereoffy, Arpad (arpi@thot.banki.hu) - Hess, Andreas (jaska@gmx.net) - Niedermayer, Michael (michaelni@gmx.at)