changeset 13559:dca22cffea19

remove non native codec specific data move lang to the info packet
author michael
date Tue, 05 Oct 2004 12:04:56 +0000
parents 2a9e0fa68a69
children ddb3c1da735b
files DOCS/tech/mncf.txt
diffstat 1 files changed, 661 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DOCS/tech/mncf.txt	Tue Oct 05 12:04:56 2004 +0000
@@ -0,0 +1,661 @@
+	NUT Open Container Format DRAFT 20041005 (Michael's experimental fork)
+		----------------------------------------
+
+
+                
+			Intro:
+
+Features / goals: 
+	(supported by the format, not necessary by a specific implementation)
+
+Simple
+	use the same encoding for nearly all fields
+	simple decoding, so slow cpus (and embedded systems) can handle it
+Extendible
+	no limit for the possible values for all fields (using universal vlc)
+	allow adding of new headers in the future
+	allow adding more fields at the end of headers
+Compact
+	~0.2% overhead, for normal bitrates
+	index is <10kb per hour (1 keyframe every 3sec)
+	a usual header for a file is about 100bytes (audio + video headers together)
+	a packet header is about ~1-8 bytes
+Error resistant
+	seeking / playback without an index
+	headers & index can be repeated
+	damaged files can be played back with minimal data lost and fast
+	resyncing times
+
+
+        
+			Definitions:
+
+MUST	the specific part must be done to conform to this standard
+SHOULD	its recommanded to be done that way but its not strictly required
+
+
+
+			Syntax:
+
+		Type definitions:
+
+f(x)	n fixed bits in big-endian order
+u(x)	unsigned number encoded in x bits in MSB first order
+
+v
+	value=0
+	do{
+		more_data			u(1)
+		data				u(7)
+		value= 128*value + data
+	}while(more_data)
+        
+s
+	temp					v
+	temp++
+	if(temp&1) value= -(temp>>1)
+	else       value=  (temp>>1)
+
+b (binary data or string)
+	for(i=0; i<length; i++){
+		data[i]				u(8)
+	}
+	Note: strings MUST be encoded in utf8
+
+vb
+	length					v
+	value					b
+
+
+		Bitstream syntax:
+packet header
+	forward ptr				v
+
+align_byte
+	while(not byte aligned)
+		one				f(1)
+
+reserved_bytes
+	for(i=0; i<forward_ptr - length_of_non_reserved; i++)
+		reserved			u(8)
+	a demuxer MUST ignore any reserved bytes
+	a muxer MUST NOT write any reserved bytes, as this would make it
+	inpossible to add new fields at the end of packets in the future in
+	a compatible way
+
+main header:
+	main_startcode				f(64)
+	packet header
+	version					v
+	stream_count				v
+	max_distance				v
+	max_short_distance			v
+	global_time_base_nom			v
+	global_time_base_denom			v
+	short_startcode				v
+	for(i=0; i<256; ){
+		tmp_flag			v
+		tmp_fields			v
+		if(tmp_fields>0) tmp_timestamp	s
+		if(tmp_fields>1) tmp_mul	v
+		if(tmp_fields>2) tmp_stream	v
+		if(tmp_fields>3) tmp_size	v
+		else tmp_size=0
+		if(tmp_fields>4) tmp_res	v
+		else tmp_res=0
+		if(tmp_fields>5) count		v
+		else count= tmp_mul - tmp_size
+		for(j=6; j<tmp_fields; j++){
+			tmp_reserved[i]		v
+		}
+		for(j=0; j<count && i<256; j++, i++){
+			flags[i]= tmp_flag;
+			stream_id_plus1[i]= tmp_stream;
+			data_size_mul[i]= tmp_mul;
+			data_size_lsb[i]= tmp_size + j;
+			timestamp_delta[i]= tmp_timestamp;
+			reserved_count[i]= tmp_res;
+		}
+	}
+	reserved_bytes
+	checksum				u(32)
+
+stream_header:
+	stream_startcode			f(64)
+	packet_header
+	stream_id				v
+	stream_class				v
+	fourcc					vb
+	average_bitrate				v
+	time_base_nom				v
+	time_base_denom				v
+	msb_timestamp_shift			v
+	decode_delay				v
+	fixed_fps				u(1)
+	reserved				u(6)
+	codec_specific_data			vb
+
+video_stream_header:
+	stream_header
+	width					v
+	height					v
+	sample_width				v
+	sample_height				v
+	colorspace_type				v
+	reserved_bytes
+	checksum				u(32)
+
+audio_stream_header:
+	stream_header
+	samplerate_nom				v
+	samplerate_denom			v
+	channel_count				v
+	reserved_bytes
+	checksum				u(32)
+
+ 
+frame
+	frame_code				f(8)
+	if(stream_id_plus1[frame_code]==0){
+		stream_id			v
+	}
+	if(timestamp_delta[frame_code]==0){
+		coded_timestamp			v
+	}
+	if(flags[frame_code]&1){
+		data_size_msb			v
+	}
+	for(i=0; i<reserved_count[frame_code]; i++)
+		reserved			v
+	data
+                
+Index:
+	index_startcode				f(64)
+	packet header
+	index_length				v
+	for(i=0; i<index_length; i++){
+		index_timestamp			v
+		index_position			v
+	}
+	reserved_bytes
+	checksum				u(32)
+
+info_packet: (optional)
+	info_startcode				f(64)
+	packet header
+	for(;;){
+		id				v
+		if(id==0) break
+		name= info_table[id][0]
+		type= info_table[id][1]
+		if(type==NULL)
+			type			vb
+		if(name==NULL)
+			name			vb
+		if(type=="v")
+			value			v
+		else
+			value			vb
+	}
+	reserved_bytes
+        checksum				u(32)
+        
+sync_point
+	frame_startcode				f(64)
+		global_timestamp		v
+
+file
+	file_id_string
+	while(!eof && next_code != index_startcode){
+		main_header
+		for(i=0; i<stream_count; i++){
+			if(next_packet==video_stream_header)
+				video_stream_header
+			else
+				audio_stream_header
+		}
+		while(next_code != main_startcode){
+			if(next_code == info_startcode)
+				info_packet
+			else{
+				if(next_code == short_startcode)
+					short_startcode    u(24)
+				else if(next_code == frame_startcode)                
+					sync_point
+				frame
+			}
+		}
+	}
+	index
+                
+forward_ptr
+	size of the packet (exactly the distance from the first byte of the
+	startcode of the current packet to the first byte of the following packet
+
+file_id_string
+	"nut/multimedia container\0"
+
+*_startcode
+        all startcodes start with 'N'
+
+main_startcode
+	0x7A561F5F04ADULL + (((uint64_t)('N'<<8) + 'M')<<48)
+stream_starcode
+	0x11405BF2F9DBULL + (((uint64_t)('N'<<8) + 'S')<<48)
+frame_startcode
+	0xE4ADEECA4569ULL + (((uint64_t)('N'<<8) + 'K')<<48)
+	frame_startcodes SHOULD be placed immedeatly before a keyframe if the
+	previous frame of the same stream was a non-keyframe, unless such
+	non-keyframe - keyframe tansitions are very frequent
+
+index_startcode
+	0xDD672F23E64EULL + (((uint64_t)('N'<<8) + 'X')<<48)
+info_startcode
+	0xAB68B596BA78ULL + (((uint64_t)('N'<<8) + 'I')<<48)
+
+version
+	2 for now
+
+max_distance
+	max distance of frame_startcodes, the distance may only be larger if
+	there is only a single frame between the 2 frame_startcodes
+	this can be used by the demuxer to detect damaged frame headers if the
+	damage results in a too long chain
+	SHOULD be set to <=32768 or at least <=65536 unless there is a very good
+	reason to set it higher otherwise reasonable error recovery will be
+	impossible
+
+max_short_distance
+	max distance of short startcodes or frame_startcodes, the distance may
+	only be larger if there is only a single frame between the 2 
+	frame_startcodes/short startcodes this can be used by the demuxer to
+	detect damaged frame headers if the damage results in a too long chain
+	SHOULD be set to <=4096 or at least <=8192 unless there is a very good
+	reason to set it higher otherwise reasonable error recovery will be
+	impossible
+
+
+short_startcode
+	MUST be 3 bytes long and MUST have 'N' as first byte, the second byte
+	MUST not be a printable uppercase letter / must not be within 65..90, 
+	default is 0x4EFE79
+
+stream_id
+	Note: streams with a lower relative class MUST have a lower relative id
+	so a stream with class 0 MUST allways have a id which is lower then any
+	stream with class > 0
+	stream_id MUST be < stream_count
+
+stream_class
+	0	video
+	32	audio
+	64	subtiles
+	Note the remaining values are reserved and MUST NOT be used
+	     a demuxer MUST ignore streams with reserved classes
+
+fourcc
+	identification for the codec
+	example: "H264"
+	MUST contain 2 or 4 bytes, note, this might be increased in the future
+	if needed
+
+time_base_nom / time_base_denom = time_base
+	the number of timer ticks per second, this MUST be equal to the fps
+	if the fixed_fps is 1
+	time_base_denom MUST not be 0
+	time_base_nom and time_base_denom MUST be relative prime
+	time_base_nom MUST be < 2^31
+	examples:
+        	fps	time_base_nom	time_base_denom
+		30	30		1
+		29.97	30000		1001
+		23.976	24000		1001
+		sample_rate	sample_rate_mul	time_base_nom	time_base_denom
+		44100		1		44100		1
+		44100		64		11025		16 
+		48000		1024		375		8  
+		Note: the advantage to using a large sample_rate_mul is that the
+		      timestamps need fewer bits
+
+global_time_base_nom / global_time_base_denom = global_time_base
+	the number of timer ticks per second
+	global_time_base_denom MUST not be 0
+	global_time_base_nom and global_time_base_denom MUST be relative prime
+	global_time_base_nom MUST be < 2^31
+
+global_timestamp
+	timestamp in global_time_base units
+	when a global_timestamp is encountered the last_timestamp of all streams
+	is set to the following:
+	ln= global_time_base_denom*time_base_nom
+	sn= global_timestamp
+	d1= global_time_base_nom
+	d2= time_base_denom
+	last_timestamp= (ln/d1*sn + ln%d1*sn/d1)/d2
+	Note, this calculation MUST be done with unsigned 64 bit integers, and 
+	is equivalent to (ln*sn)/(d1*d2) but this would require a 96bit integer
+
+msb_timestamp_shift
+	amount of bits in lsb_timestamp
+	MUST be <16
+
+decode_delay
+	maximum time between input and output for a codec, used to generate dts
+	from pts
+	is 0 for streams without b frames, and 1 for streams with b frames, may
+	be larger for future codecs
+        
+fixed_fps
+	1 indicates that the fps is fixed
+
+codec_specific_data
+	private global data for a codec (could be huffman tables or ...)
+
+frame_code
+	the meaning of this byte is stored in the main header
+	the value 78 ('N') is forbidden to ensure that the byte is always
+	different from the first byte of any startcode
+
+flags[frame_code]
+	the bits of the flags from MSB to LSB are KD
+	if D is 1 then data_size_msb is coded, otherwise data_size_msb is 0
+	K is the keyframe_type 
+		0-> no keyframe, 
+		1-> keyframe, 
+	flags=4 can be used to mark illegal frame_code bytes
+	frame_code=78 must have flags=4
+	* frames MUST not depend(1) upon frames prior to the last
+	  frame_startcode
+	depend(1) means dependancy on the container level (NUT) not dependancy
+	on the codec level
+
+stream_id_plus1[frame_code]
+	must be <250
+	if its 0 then the stream_id is coded in the frame
+
+data_size_mul[frame_code]
+	must be <16384
+
+data_size_lsb[frame_code]
+	must be <16384
+
+timestamp_delta[frame_code]
+	must be <16384 and >-16384
+
+data_size       
+	data_size= data_size_lsb + data_size_msb*data_size_mul;
+
+coded_timestamp
+	if coded_timestamp < (1<<msb_timestamp_shift) then its a
+	lsb timestamp, otherwise its a full timestamp + (1<<msb_timestamp_shift)
+	lsb timestamps are converted to full timesamps by:
+	mask = (1<<msb_timestamp_shift)-1;
+	delta= last_timestamp - mask/2
+	timestamp= ((timestamp_lsb-delta)&mask) + delta
+	a full timestamp must be used if there is no reference timestamp
+	available after the last frame_startcode with the current stream_id
+        
+lsb_timestamp
+	least significant bits of the timestamp in time_base precission
+        Example: IBBP display order
+		keyframe timestamp=0                     -> timestamp=0
+		frame                    lsb_timestamp=3 -> timestamp=3
+		frame                    lsb_timestamp=1 -> timestamp=1
+		frame                    lsb_timestamp=2 -> timestamp=2
+		...
+		keyframe msb_timestamp=257               -> timestamp=257
+		frame                    lsb_timestamp=255->timestamp=255
+		frame                    lsb_timestamp=0 -> timestamp=256
+		frame                    lsb_timestamp=4 -> timestamp=260
+		frame                    lsb_timestamp=2 -> timestamp=258
+		frame                    lsb_timestamp=3 -> timestamp=259
+	all timestamps of keyframes of a single stream MUST be monotone
+
+dts
+	dts are calculated by using a decode_delay+1 sized buffer for each 
+	stream, into which the current pts is inserted and the element with
+	the smallest value is removed, this is then the current dts
+	this buffer is initalized with decode_delay -1 elements
+	all frames with dts == timestamp must be monotone, that means a frame
+	which occures later in the stream must have a larger or equal dts
+	then an earlier frame
+	FIXME rename timestamp* to pts* ?
+
+width/height
+	MUST be set to the coded width/height
+
+sample_width/sample_height (aspect ratio)
+	sample_width is the horizontal distance between samples
+	sample_width and sample_height MUST be relative prime if not zero
+	MUST be 0 if unknown
+        
+colorspace_type
+	0	unknown
+	1	ITU Rec 624 / ITU Rec 601 Y range: 16..235 Cb/Cr range: 16..240
+	2	ITU Rec 709               Y range: 16..235 Cb/Cr range: 16..240
+	17	ITU Rec 624 / ITU Rec 601 Y range:  0..255 Cb/Cr range:  0..255
+	18	ITU Rec 709               Y range:  0..255 Cb/Cr range:  0..255
+
+samplerate_nom / samplerate_denom = samplerate
+	the number of samples per second
+
+checksum
+	adler32 checksum
+
+index_timestamp
+	value of the timetamp in a sync point relative to the last sync-point
+
+index_position
+	position in bytes of the first byte of a sync-point, relative to the
+	last sync_point
+
+id
+	the id of the type/name pair, so its more compact
+	0 means end
+                
+type
+	for example: "UTF8" -> String or "JPEG" -> jpeg image
+	Note: nonstandard fields should be prefixed by "X-"
+	Note: MUST be less than 6 byte long (might be increased to 64 later)
+
+name
+	the name of the info entry, valid names are
+	"TotalTime"	total length of the stream in msecs
+	"StreamId"	the stream(s) to which the info packet applies
+	"StartTimestamp" 
+	"EndTimestamp" 	the time range in msecs to which the info applies
+	"SegmentId"	a unique id for the streams + time specified
+	"Author"
+	"Description"
+	"Copyright"
+	"Encoder"	the name & version of the software used for encoding
+	"Title"
+	"Cover"		an image of the (cd,dvd,vhs,..) cover (preferable PNG or JPEG)
+	"Source"	"DVD", "VCD", "CD", "MD", "FM radio", "VHS", "TV", 
+			"LD"
+			Optional: appended PAL,NTSC,SECAM, ... in parentheses
+	"CaptureDevice"	"BT878", "BT848", "webcam", ... (more exact names are fine too)
+	"CreationTime"	"2003-01-20 20:13:15Z", ... 
+			(ISO 8601 format, see http://www.cl.cam.ac.uk/~mgk25/iso-time.html)
+			Note: dont forget the timezone
+	"ReplayGain"
+	"Keywords"
+	"Language"	ISO 639 and ISO 3166 for language/country code
+			something like "usen" (US english), can be 0 if unknown
+			and "multi" if several languages
+			see http://www.loc.gov/standards/iso639-2/englangn.html
+			and http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.htmlthe language code
+	"Disposition"	"original", "dub" (translated), "comment", "lyrics", "karaoke"
+	Note: if someone needs some others, please tell us about them, so we can
+	      add them to the official standard (if they are sane)
+	Note: nonstandard fields should be prefixed by "X-"
+	Note: MUST be less than 64 bytes long
+
+value
+	value of this name/type pair
+	
+stuffing
+        0x80 can be placed infront of any type v entry for stuffing
+	      purposes
+
+info_table[][2]={
+	{NULL			,  NULL }, // end
+	{NULL			,  NULL },
+	{NULL			, "UTF8"},
+	{NULL			, "v"},
+	{NULL			, "s"},
+	{"StreamId"		, "v"},
+	{"SegmentId"		, "v"},
+	{"StartTimestamp"	, "v"},
+	{"EndTimestamp"		, "v"},
+	{"Author"		, "UTF8"},
+	{"Titel"		, "UTF8"},
+	{"Language"		, "UTF8"},
+	{"Description"		, "UTF8"},
+	{"Copyright"		, "UTF8"},
+	{"Encoder"		, "UTF8"},
+	{"Keyword"		, "UTF8"},
+	{"Cover"		, "JPEG"},
+	{"Cover"		, "PNG"},
+	{"Disposition"		, "UTF8"},
+};
+
+			Structure:
+
+the headers MUST be in exactly the following order (to simplify demuxer design)
+main header
+stream_header (id=0)
+stream_header (id=1)
+...
+stream_header (id=n)
+
+headers may be repated, but if they are then they MUST all be repeated together
+and repeated headers MUST be identical
+headers MAY only repeated at the closest possible positions after 2^x where x is 
+an integer and the file end, so the headers may be repeated at 4102 if thats the
+closest possition after 2^12=4096 at which the headers can be placed
+
+headers MUST be placed at least at the begin of the file and immedeatly before
+the index or at the file end if there is no index
+headers MUST be repeated at least twice (so they exist 3 times in a file)
+
+a demuxer MUST not demux a stream which contains more than one stream, or which
+is wrapped in a structure to facilitate more than one stream or otherwise 
+duplicate the role of a container. any such file is to be considered invalid
+
+info packets which describe the whole file or individual streams/tracks must be
+placed before any video/audio/... frames
+
+		Index
+every sync-point must be exacty once in the index
+Note: in case of realtime streaming there is no end, so no index there either
+
+		Info packets
+the info_packet can be repeated, it can also contain different names & values
+each time but only if allso the time is different
+Info packets can be used to describe the file or some part of it (chapters)
+
+info packets, SHOULD be placed at the begin of the file at least
+for realtime streaming info packets will normally be transmitted when they apply
+for example, the current song title & artist of the currently shown music video
+
+		Unknown packets
+MUST be ignored by the demuxer
+
+			demuxer (non-normative)
+
+in the absence of valid header at beginning, players SHOULD search for backup
+headers starting at offset 2^x for each x players SHOULD end their search from a
+particular offset when any startcode is found (including syncpoint)
+
+
+			Sample code (GPL, & untested)
+
+typedef BufferContext{
+	uint8_t *buf;
+	uint8_t *buf_ptr;
+}BufferContext;
+
+static inline uint64_t get_bytes(BufferContext *bc, int count){
+	uint64_t val=0;
+
+	assert(count>0 && count<9)
+        
+	for(i=0; i<count; i++){
+		val <<=8;
+		val += *(bc->buf_ptr++);
+	}
+        
+	return val;
+}
+
+static inline void put_bytes(BufferContext *bc, int count, uint64_t val){
+	uint64_t val=0;
+
+	assert(count>0 && count<9)
+
+	for(i=count-1; i>=0; i--){
+		*(bc->buf_ptr++)= val >> (8*i);
+	}
+        
+	return val;
+}
+
+static inline uint64_t get_v(BufferContext *bc){
+	uint64_t val= 0;
+
+	for(; space_left(bc) > 0; ){
+		int tmp= *(bc->buf_ptr++);
+		if(tmp&0x80)
+			val= (val<<7) + tmp - 0x80;
+		else
+			return (val<<7) + tmp;
+	}
+        
+	return -1;
+}
+
+static inline int put_v(BufferContext *bc, uint64_t val){
+	int i;
+        
+	if(space_left(bc) < 9) return -1;
+
+	val &= 0x7FFFFFFFFFFFFFFFULL; // FIXME can only encode upto 63 bits currently
+	for(i=7; ; i+=7){
+		if(val>>i == 0) break;
+	}
+
+	for(i-=7; i>0; i-=7){
+		*(bc->buf_ptr++)= 0x80 | (val>>i);
+	}
+	*(bc->buf_ptr++)= val&0x7F;
+        
+	return 0;
+}
+
+static int64_t get_dts(int64_t pts, int64_t *pts_cache, int delay, int reset){
+	if(reset) memset(pts_cache, -1, delay*sizeof(int64_t));
+
+	while(delay--){
+		int64_t t= pts_cache[delay];
+		if(t < pts){
+			pts_cache[delay]= pts;
+			pts= t;
+		}
+	}
+
+	return pts;
+}
+
+		        Authors
+
+Folks from MPlayer Developers Mailinglist (http://www.mplayehrq.hu/).
+Authors in ABC-order: (FIXME! Tell us if we left you out)
+    Beregszaszi, Alex (alex@fsn.hu)
+    Bunkus, Moritz (moritz@bunkus.org)
+    Diedrich, Tobias (td@sim.uni-hannover.de)
+    Felker, Rich (dalias@aerifal.cx)
+    Franz, Fabian (FabianFranz@gmx.de)
+    Gereoffy, Arpad (arpi@thot.banki.hu)
+    Hess, Andreas (jaska@gmx.net)
+    Niedermayer, Michael (michaelni@gmx.at)