Mercurial > mplayer.hg
comparison DOCS/tech/nut.txt @ 19057:7c30d49e7850
rename mpcf.txt to nut.txt
author | michael |
---|---|
date | Thu, 13 Jul 2006 10:29:21 +0000 |
parents | DOCS/tech/mpcf.txt@36f993b7f91d |
children | 4c979c32f5cc |
comparison
equal
deleted
inserted
replaced
19056:36f993b7f91d | 19057:7c30d49e7850 |
---|---|
1 ================================== | |
2 NUT Open Container Format 20060713 | |
3 ================================== | |
4 | |
5 | |
6 | |
7 Intro: | |
8 ====== | |
9 | |
10 Features / goals: | |
11 (supported by the format, not necessarily by a specific implementation) | |
12 | |
13 Simple | |
14 use the same encoding for nearly all fields | |
15 simple decoding, so slow CPUs (and embedded systems) can handle it | |
16 | |
17 Extendible | |
18 no limit for the possible values of all fields (using universal vlc) | |
19 allow adding of new headers in the future | |
20 allow adding more fields at the end of headers | |
21 | |
22 Compact | |
23 ~0.2% overhead, for normal bitrates | |
24 index is <100kb per hour | |
25 a usual header for a file is about 100 bytes (audio + video headers together) | |
26 a packet header is about ~1-5 bytes | |
27 | |
28 Error resistant | |
29 seeking / playback without an index | |
30 headers & index can be repeated | |
31 damaged files can be played back with minimal data loss and fast | |
32 resync times | |
33 | |
34 The spec is frozen. All files following spec will be compatible unless the | |
35 spec is unfrozen. | |
36 | |
37 | |
38 Definitions: | |
39 ============ | |
40 | |
41 MUST the specific part must be done to conform to this standard | |
42 SHOULD it is recommended to be done that way, but not strictly required | |
43 | |
44 | |
45 | |
46 Syntax: | |
47 ======= | |
48 | |
49 Since NUT heavily uses variable length fields, the simplest way to describe it | |
50 is using a pseudocode approach. | |
51 | |
52 | |
53 | |
54 Conventions: | |
55 ============ | |
56 | |
57 The data types have a name, used in the bitstream syntax description, a short | |
58 text description and a pseudocode (functional) definition, optional notes may | |
59 follow: | |
60 | |
61 name (text description) | |
62 functional definition | |
63 [Optional notes] | |
64 | |
65 The bitstream syntax elements have a tagname and a functional definition, they | |
66 are presented in a bottom up approach, again optional notes may follow and | |
67 are reproduced in the tag description: | |
68 | |
69 name: (optional note) | |
70 functional definition | |
71 [Optional notes] | |
72 | |
73 The in-depth tag description follows the bitstream syntax. | |
74 The functional definition has a C-like syntax. | |
75 | |
76 | |
77 | |
78 Type definitions: | |
79 ================= | |
80 | |
81 f(n) (n fixed bits in big-endian order) | |
82 u(n) (unsigned number encoded in n bits in MSB-first order) | |
83 | |
84 v (variable length value, unsigned) | |
85 value=0 | |
86 do{ | |
87 more_data u(1) | |
88 data u(7) | |
89 value= 128*value + data | |
90 }while(more_data) | |
91 | |
92 s (variable length value, signed) | |
93 temp v | |
94 temp++ | |
95 if(temp&1) value= -(temp>>1) | |
96 else value= (temp>>1) | |
97 | |
98 b (binary data or string, to be use in vb, see below) | |
99 for(i=0; i<length; i++){ | |
100 data[i] u(8) | |
101 } | |
102 [Note: strings MUST be encoded in UTF-8] | |
103 [Note: the character NUL (U+0000) is not legal within | |
104 or at the end of a string.] | |
105 | |
106 vb (variable length binary data or string) | |
107 length v | |
108 value b | |
109 | |
110 t (v coded universal timestamp) | |
111 tmp v | |
112 id= tmp % time_base_count | |
113 value= (tmp / time_base_count) * time_base[id] | |
114 | |
115 | |
116 Bitstream syntax: | |
117 ================= | |
118 | |
119 Common elements: | |
120 ---------------- | |
121 | |
122 reserved_bytes: | |
123 for(i=0; i<forward_ptr - length_of_non_reserved; i++) | |
124 reserved u(8) | |
125 [a demuxer MUST ignore any reserved bytes | |
126 a muxer MUST NOT write any reserved bytes, as this would make it | |
127 impossible to add new fields at the end of packets in the future | |
128 in a compatible way] | |
129 | |
130 packet_header | |
131 startcode f(64) | |
132 forward_ptr v | |
133 if(forward_ptr > 4096) | |
134 header_checksum u(32) | |
135 | |
136 packet_footer | |
137 reserved_bytes | |
138 checksum u(32) | |
139 [Note: in index packet, reserved_bytes comes before index_ptr] | |
140 | |
141 reserved_headers | |
142 while(next_byte == 'N' && next_code != main_startcode | |
143 && next_code != stream_startcode | |
144 && next_code != info_startcode | |
145 && next_code != index_startcode | |
146 && next_code != syncpoint_startcode){ | |
147 packet_header | |
148 packet_footer | |
149 } | |
150 | |
151 Headers: | |
152 | |
153 main header: | |
154 version v | |
155 stream_count v | |
156 max_distance v | |
157 time_base_count v | |
158 for(i=0; i<time_base_count; i++) | |
159 time_base_nom v | |
160 time_base_denom v | |
161 time_base[i]= time_base_nom/time_base_denom | |
162 tmp_pts=0 | |
163 tmp_mul=1 | |
164 tmp_stream=0 | |
165 for(i=0; i<256; ){ | |
166 tmp_flag v | |
167 tmp_fields v | |
168 if(tmp_fields>0) tmp_pts s | |
169 if(tmp_fields>1) tmp_mul v | |
170 if(tmp_fields>2) tmp_stream v | |
171 if(tmp_fields>3) tmp_size v | |
172 else tmp_size=0 | |
173 if(tmp_fields>4) tmp_res v | |
174 else tmp_res=0 | |
175 if(tmp_fields>5) count v | |
176 else count= tmp_mul - tmp_size | |
177 for(j=6; j<tmp_fields; j++){ | |
178 tmp_reserved[i] v | |
179 } | |
180 for(j=0; j<count && i<256; j++, i++){ | |
181 if (i == 'N') { | |
182 flags[i]= FLAG_INVALID; | |
183 j--; | |
184 continue; | |
185 } | |
186 flags[i]= tmp_flag; | |
187 stream_id[i]= tmp_stream; | |
188 data_size_mul[i]= tmp_mul; | |
189 data_size_lsb[i]= tmp_size + j; | |
190 pts_delta[i]= tmp_pts; | |
191 reserved_count[i]= tmp_res; | |
192 } | |
193 } | |
194 | |
195 stream_header: | |
196 stream_id v | |
197 stream_class v | |
198 fourcc vb | |
199 time_base_id v | |
200 msb_pts_shift v | |
201 max_pts_distance v | |
202 decode_delay v | |
203 stream_flags v | |
204 codec_specific_data vb | |
205 if(stream_class == video){ | |
206 width v | |
207 height v | |
208 sample_width v | |
209 sample_height v | |
210 colorspace_type v | |
211 }else if(stream_class == audio){ | |
212 samplerate_nom v | |
213 samplerate_denom v | |
214 channel_count v | |
215 } | |
216 | |
217 Basic Packets: | |
218 | |
219 frame: | |
220 frame_code f(8) | |
221 frame_flags= flags[frame_code] | |
222 if(frame_flags&FLAG_CODED){ | |
223 coded_flags v | |
224 frame_flags ^= coded_flags | |
225 } | |
226 if(frame_flags&FLAG_STREAM_ID){ | |
227 stream_id v | |
228 } | |
229 if(frame_flags&FLAG_CODED_PTS){ | |
230 coded_pts v | |
231 } | |
232 if(frame_flags&FLAG_SIZE_MSB){ | |
233 data_size_msb v | |
234 } | |
235 if(frame_flags&FLAG_RESERVED) | |
236 reserved_count[frame_code] v | |
237 for(i=0; i<reserved_count[frame_code]; i++) | |
238 reserved v | |
239 if(frame_flags&FLAG_CHECKSUM){ | |
240 checksum u(32) | |
241 } | |
242 data | |
243 | |
244 index: | |
245 max_pts t | |
246 syncpoints v | |
247 for(i=0; i<syncpoints; i++){ | |
248 syncpoint_pos_div16 v | |
249 } | |
250 for(i=0; i<stream_count; i++){ | |
251 last_pts= -1 | |
252 for(j=0; j<syncpoints; ){ | |
253 x v | |
254 type= x & 1 | |
255 x>>=1 | |
256 n=j | |
257 if(type){ | |
258 flag= x & 1 | |
259 x>>=1 | |
260 while(x--) | |
261 has_keyframe[n++][i]=flag | |
262 has_keyframe[n++][i]=!flag; | |
263 }else{ | |
264 while(x != 1){ | |
265 has_keyframe[n++][i]=x&1; | |
266 x>>=1; | |
267 } | |
268 } | |
269 for(; j<n && j<syncpoints; j++){ | |
270 if (!has_keyframe[j][i]) continue | |
271 A v | |
272 if(!A){ | |
273 A v | |
274 B v | |
275 eor_pts[j][i] = last_pts + A + B | |
276 }else | |
277 B=0 | |
278 keyframe_pts[j][i] = last_pts + A | |
279 last_pts += A + B | |
280 } | |
281 } | |
282 } | |
283 reserved_bytes | |
284 index_ptr u(64) | |
285 | |
286 info_packet: | |
287 stream_id_plus1 v | |
288 chapter_id v | |
289 chapter_start t | |
290 chapter_len v | |
291 count v | |
292 for(i=0; i<count; i++){ | |
293 name vb | |
294 value s | |
295 if (value==-1){ | |
296 type= "UTF-8" | |
297 value vb | |
298 }else if (value==-2){ | |
299 type vb | |
300 value vb | |
301 }else if (value==-3){ | |
302 type= "s" | |
303 value s | |
304 }else if (value==-4){ | |
305 type= "t" | |
306 value t | |
307 }else if (value<-4){ | |
308 type= "r" | |
309 value.den= -value-4 | |
310 value.num s | |
311 }else{ | |
312 type= "v" | |
313 } | |
314 } | |
315 | |
316 syncpoint: | |
317 global_key_pts t | |
318 back_ptr_div16 v | |
319 | |
320 Complete definition: | |
321 | |
322 file: | |
323 file_id_string | |
324 while(!eof){ | |
325 if(next_byte == 'N'){ | |
326 packet_header | |
327 switch(startcode){ | |
328 case main_startcode: main_header; break; | |
329 case stream_startcode:stream_header; break; | |
330 case info_startcode: info_packet; break; | |
331 case index_startcode: index; break; | |
332 case syncpoint_startcode: syncpoint; break; | |
333 } | |
334 packet_footer | |
335 }else | |
336 frame | |
337 } | |
338 | |
339 the structure of a undamaged file should look like the following, but | |
340 demuxers should be flexible and be able to deal with damaged headers so the | |
341 above is a better loop in practice (not to mention its simpler) | |
342 note, demuxers MUST be able to deal with new and unknown headers | |
343 | |
344 file: | |
345 file_id_string | |
346 while(!eof){ | |
347 packet_header, main_header, packet_footer | |
348 reserved_headers | |
349 for(i=0; i<stream_count; i++){ | |
350 packet_header, stream_header, packet_footer | |
351 reserved_headers | |
352 } | |
353 while(next_code == info_startcode){ | |
354 packet_header, info_packet, packet_footer | |
355 reserved_headers | |
356 } | |
357 if(next_code == index_startcode){ | |
358 packet_header, index_packet, packet_footer | |
359 } | |
360 if (!eof) while(next_code != main_startcode){ | |
361 if(next_code == syncpoint_startcode){ | |
362 packet_header, syncpoint, packet_footer | |
363 } | |
364 frame | |
365 reserved_headers | |
366 } | |
367 } | |
368 | |
369 | |
370 Tag description: | |
371 ---------------- | |
372 | |
373 file_id_string | |
374 "nut/multimedia container\0" | |
375 | |
376 *_startcode | |
377 all startcodes start with 'N' | |
378 | |
379 main_startcode | |
380 0x7A561F5F04ADULL + (((uint64_t)('N'<<8) + 'M')<<48) | |
381 | |
382 stream_starcode | |
383 0x11405BF2F9DBULL + (((uint64_t)('N'<<8) + 'S')<<48) | |
384 | |
385 syncpoint_startcode | |
386 0xE4ADEECA4569ULL + (((uint64_t)('N'<<8) + 'K')<<48) | |
387 | |
388 index_startcode | |
389 0xDD672F23E64EULL + (((uint64_t)('N'<<8) + 'X')<<48) | |
390 | |
391 info_startcode | |
392 0xAB68B596BA78ULL + (((uint64_t)('N'<<8) + 'I')<<48) | |
393 | |
394 version | |
395 NUT version. The current value is 3. All lower values are pre-freeze | |
396 | |
397 forward_ptr | |
398 size of the packet data (exactly the distance from the first byte | |
399 after the packet_header to the first byte of the next packet) | |
400 | |
401 max_distance | |
402 max distance between startcodes. If p1 and p2 are the byte | |
403 positions of the first byte of two consecutive startcodes, then | |
404 p2-p1 MUST be less than or equal to max_distance unless the entire | |
405 span from p1 to p2 comprises a single packet or a syncpoint | |
406 followed by a single frame. This imposition places efficient upper | |
407 bounds on seek operations and allows for the detection of damaged | |
408 frame headers, should a chain of frame headers pass max_distance | |
409 without encountering any startcode. | |
410 | |
411 syncpoints SHOULD be placed immediately before a keyframe if the | |
412 previous frame of the same stream was a non-keyframe, unless such | |
413 non-keyframe - keyframe transitions are very frequent | |
414 | |
415 SHOULD be set to <=32768 | |
416 if the stored value is >65536 then max_distance MUST be set to 65536 | |
417 | |
418 This is also half the max frame size without a checksum after the | |
419 frameheader. | |
420 | |
421 | |
422 max_pts_distance | |
423 max absoloute difference of pts of new frame from last_pts in the | |
424 timebase of the stream, without a checksum after the frameheader. | |
425 A frame header MUST include a checksum if abs(pts-last_pts) is | |
426 strictly greater than max_pts_distance. | |
427 Note that last_pts is not necessarily the pts of the last frame | |
428 on the same stream, as it is altered by syncpoint timestamps. | |
429 SHOULD NOT be higher than 1/timebase | |
430 | |
431 stream_id | |
432 Stream identifier | |
433 stream_id MUST be < stream_count | |
434 | |
435 stream_class | |
436 0 video | |
437 1 audio | |
438 2 subtiles | |
439 3 userdata | |
440 Note: the remaining values are reserved and MUST NOT be used | |
441 a demuxer MUST ignore streams with reserved classes | |
442 | |
443 fourcc | |
444 identification for the codec | |
445 example: "H264" | |
446 MUST contain 2 or 4 bytes, note, this might be increased in the future | |
447 if needed | |
448 the id values used are the same as in avi, so if a codec uses a specific | |
449 fourcc in avi then the same fourcc MUST be used here | |
450 | |
451 time_base_nom / time_base_denom = time_base | |
452 the length of a timer tick in seconds, this MUST be equal to the 1/fps | |
453 if FLAG_FIXED_FPS is set | |
454 time_base_nom and time_base_denom MUST NOT be 0 | |
455 time_base_nom and time_base_denom MUST be relatively prime | |
456 time_base_denom MUST be < 2^31 | |
457 examples: | |
458 fps time_base_nom time_base_denom | |
459 30 1 30 | |
460 29.97 1001 30000 | |
461 23.976 1001 24000 | |
462 There MUST NOT be 2 identical timebases in a file. | |
463 There SHOULD NOT be more timebases than streams. | |
464 | |
465 time_base_id | |
466 id to time_base table | |
467 | |
468 convert_ts | |
469 To switch from 2 different timebases, the following calculation is | |
470 defined: | |
471 | |
472 ln = from_time_base_nom*to_time_base_denom | |
473 sn = from_timestamp | |
474 d1 = from_time_base_denom | |
475 d2 = to_time_base_nom | |
476 timestamp = (ln/d1*sn + ln%d1*sn/d1)/d2 | |
477 Note: this calculation MUST be done with unsigned 64 bit integers, and | |
478 is equivalent to (ln*sn)/(d1*d2) but this would require a 96bit integer | |
479 | |
480 compare_ts | |
481 Compares timestamps from 2 different timebases, | |
482 if a is before b then compare_ts(a, b) = -1 | |
483 if a is after b then compare_ts(a, b) = 1 | |
484 else compare_ts(a, b) = 0 | |
485 | |
486 Care must be taken that this is done exactly with no rounding errors, | |
487 simply casting to float or double and doing the obvious | |
488 a*timebase > b*timebase is not compliant or correct, neither is the | |
489 same with integers, and | |
490 a*a_timebase.num*b_timebase.den > b*b_timebase.num*a_timebase.den | |
491 will overflow. One possible implementation which shouldn't overflow | |
492 within the range of legal timestamps and timebases is: | |
493 | |
494 if (convert_ts(a, a_timebase, b_timebase) < b) return -1; | |
495 if (convert_ts(b, b_timebase, a_timebase) < a) return 1; | |
496 return 0; | |
497 | |
498 msb_pts_shift | |
499 amount of bits in lsb_pts | |
500 MUST be <16 | |
501 | |
502 decode_delay | |
503 maximum time between input and output for a codec, used to generate | |
504 dts from pts | |
505 is set to 0 for streams without B-frames, and set to 1 for streams with | |
506 B-frames, may be larger for future codecs | |
507 decode_delay MUST NOT be set higher than necessary for a codec. | |
508 | |
509 stream_flags | |
510 Bit Name Description | |
511 1 FLAG_FIXED_FPS indicates that the fps is fixed | |
512 | |
513 codec_specific_data | |
514 private global data for a codec (could be huffman tables or ...) | |
515 | |
516 frame_code | |
517 the meaning of this byte is stored in the main header | |
518 the value 78 ('N') is forbidden to ensure that the byte is always | |
519 different from the first byte of any startcode | |
520 a muxer SHOULD mark 0x00 and 0xFF as invalid to improve error | |
521 detection | |
522 | |
523 flags[frame_code], frame_flags | |
524 Bit Name Description | |
525 1 FLAG_KEY if set, frame is keyframe | |
526 2 FLAG_EOR if set, stream has no relevance on | |
527 presentation. (EOR) | |
528 8 FLAG_CODED_PTS if set, coded_pts is in the frame header | |
529 16 FLAG_STREAM_ID if set, stream_id is coded in the frame header | |
530 32 FLAG_SIZE_MSB if set, data_size_msb is at frame header, | |
531 otherwise data_size_msb is 0 | |
532 64 FLAG_CHECKSUM if set then the frame header contains a checksum | |
533 128 FLAG_RESERVED if set, reserved_count is coded in the frame header | |
534 4096 FLAG_CODED if set, coded_flags are stored in the frame header. | |
535 8192 FLAG_INVALID if set, frame_code is invalid. | |
536 | |
537 EOR frames MUST be zero-length and must be set keyframe. | |
538 All streams SHOULD end with EOR, where the pts of the EOR indicates the | |
539 end presentation time of the final frame. | |
540 An EOR set stream is unset by the first content frames. | |
541 EOR can only be unset in streams with zero decode_delay . | |
542 FLAG_CHECKSUM MUST be set if the frame's data_size is strictly greater than | |
543 2*max_distance or the difference abs(pts-last_pts) is strictly greater than | |
544 max_pts_distance (where pts represents this frame's pts and last_pts is | |
545 defined as below). | |
546 | |
547 stream_id[frame_code] | |
548 MUST be <250 | |
549 | |
550 data_size_mul[frame_code] | |
551 MUST be <16384 | |
552 | |
553 data_size_lsb[frame_code] | |
554 MUST be <16384 | |
555 | |
556 pts_delta[frame_code] | |
557 MUST be <16384 and >-16384 | |
558 | |
559 reserved_count[frame_code] | |
560 MUST be <256 | |
561 | |
562 data_size | |
563 data_size= data_size_lsb + data_size_msb*data_size_mul; | |
564 | |
565 coded_pts | |
566 if coded_pts < (1<<msb_pts_shift) then it is an lsb | |
567 pts, otherwise it is a full pts + (1<<msb_pts_shift) | |
568 lsb pts is converted to a full pts by: | |
569 mask = (1<<msb_pts_shift)-1; | |
570 delta = last_pts - mask/2 | |
571 pts = ((pts_lsb-delta)&mask) + delta | |
572 | |
573 lsb_pts | |
574 least significant bits of the pts in time_base precision | |
575 Example: IBBP display order | |
576 keyframe pts=0 -> pts=0 | |
577 frame lsb_pts=3 -> pts=3 | |
578 frame lsb_pts=1 -> pts=1 | |
579 frame lsb_pts=2 -> pts=2 | |
580 ... | |
581 keyframe msb_pts=257 -> pts=257 | |
582 frame lsb_pts=255 -> pts=255 | |
583 frame lsb_pts=0 -> pts=256 | |
584 frame lsb_pts=4 -> pts=260 | |
585 frame lsb_pts=2 -> pts=258 | |
586 frame lsb_pts=3 -> pts=259 | |
587 all pts's of keyframes of a single stream MUST be monotone | |
588 | |
589 dts | |
590 dts is calculated by using a decode_delay+1 sized buffer for each | |
591 stream, into which the current pts is inserted and the element with | |
592 the smallest value is removed, this is then the current dts | |
593 this buffer is initalized with decode_delay -1 elements | |
594 | |
595 Pts of all frames in all streams MUST be bigger or equal to dts of all | |
596 previous frames in all streams, compared in common timebase. (EOR | |
597 frames are NOT exempt from this rule) | |
598 | |
599 width/height | |
600 MUST be set to the coded width/height, MUST NOT be 0 | |
601 | |
602 sample_width/sample_height (aspect ratio) | |
603 sample_width is the horizontal distance between samples | |
604 sample_width and sample_height MUST be relatively prime if not zero | |
605 both MUST be 0 if unknown otherwise both MUST be non zero | |
606 | |
607 colorspace_type | |
608 0 unknown | |
609 1 ITU Rec 624 / ITU Rec 601 Y range: 16..235 Cb/Cr range: 16..240 | |
610 2 ITU Rec 709 Y range: 16..235 Cb/Cr range: 16..240 | |
611 17 ITU Rec 624 / ITU Rec 601 Y range: 0..255 Cb/Cr range: 0..255 | |
612 18 ITU Rec 709 Y range: 0..255 Cb/Cr range: 0..255 | |
613 | |
614 samplerate_nom / samplerate_denom = samplerate | |
615 the number of samples per second, MUST NOT be 0 | |
616 | |
617 crc32 checksum | |
618 Generator polynomial is 0x104C11DB7. Starting value is zero. | |
619 | |
620 checksum | |
621 crc32 checksum | |
622 checksum is calculated for the area pointed to by forward_ptr not | |
623 including the checksum itself (from first byte after the | |
624 packet_header until last byte before the checksum). | |
625 for frame headers the checksum contains the framecode byte and all | |
626 following bytes upto the checksum itself | |
627 | |
628 header_checksum | |
629 checksum over the startcode and forward pointer | |
630 | |
631 Syncpoint tags: | |
632 --------------- | |
633 | |
634 back_ptr_div16 | |
635 back_ptr = back_ptr_div16 * 16 + 15 | |
636 back_ptr must point to a position within 16 bytes of a syncpoint | |
637 startcode. This syncpoint MUST be the closest syncpoint such that at | |
638 least one keyframe with a pts lower or equal to the original syncpoint's | |
639 global_key_pts for all streams lies between it and the current syncpoint. | |
640 | |
641 A stream where EOR is set is to be ignored for back_ptr. | |
642 | |
643 global_key_pts | |
644 After a syncpoint, last_pts of each stream is to be set to: | |
645 last_pts[i] = convert_ts(global_key_pts, time_base[id], time_base[i]) | |
646 | |
647 global_key_pts MUST be bigger or equal to dts of all past frames across | |
648 all streams, and smaller or equal to pts of all future frames. | |
649 | |
650 Index tags: | |
651 ----------- | |
652 | |
653 max_pts | |
654 The highest pts in the entire file | |
655 | |
656 syncpoint_pos_div16 | |
657 offset from begginning of file to up to 15 bytes before the syncpoint | |
658 referred to in this index entry. Relative to position of last | |
659 syncpoint. | |
660 | |
661 has_keyframe | |
662 indicates whether this stream has a keyframe between this syncpoint and | |
663 the last syncpoint. | |
664 | |
665 keyframe_pts | |
666 The pts of the first keyframe for this stream in the region between the | |
667 2 syncpoints, in the stream's timebase. (EOR frames are also keyframes) | |
668 | |
669 eor_pts | |
670 Coded only if EOR is set at the position of the syncpoint. The pts of | |
671 that EOR. EOR is unset by the first keyframe after it. | |
672 | |
673 index_ptr | |
674 Length in bytes of the entire index, from the first byte of the | |
675 startcode until the last byte of the checksum. | |
676 Note: A demuxer can use this to find the index when it is written at | |
677 EOF, as index_ptr will always be 12 bytes before the end of file if | |
678 there is an index at all. | |
679 | |
680 | |
681 Info tags: | |
682 ---------- | |
683 | |
684 stream_id_plus1 | |
685 Stream this info packet applies to. If zero, packet applies to whole | |
686 file. | |
687 | |
688 chapter_id | |
689 Id of chapter this packet applies to. If zero, packet applies to whole | |
690 file. Positive chapter_id's are real chapters and MUST NOT overlap. | |
691 Negative chapter_id indicate a sub region of file and not a real | |
692 chapter. chapter_id MUST be unique to the region it represents. | |
693 chapter_id n MUST not be used unless there are at least n chapters in the | |
694 file | |
695 | |
696 chapter_start | |
697 timestamp of start of chapter | |
698 | |
699 chapter_len | |
700 Length of chapter in same timebase of chapter_start. | |
701 | |
702 type | |
703 for example: "UTF8" -> string or "JPEG" -> JPEG image | |
704 "v" -> unsigned integer | |
705 "s" -> signed integer | |
706 "r" -> rational | |
707 Note: nonstandard fields should be prefixed by "X-" | |
708 Note: MUST be less than 6 byte long (might be increased to 64 later) | |
709 | |
710 info packet types | |
711 the name of the info entry, valid names are | |
712 "Author" | |
713 "Description" | |
714 "Copyright" | |
715 "Encoder" | |
716 the name & version of the software used for encoding | |
717 "Title" | |
718 "Cover" (allowed types are "PNG" and "JPEG") | |
719 image of the (CD, DVD, VHS, ..) cover (preferably PNG or JPEG) | |
720 "Source" | |
721 "DVD", "VCD", "CD", "MD", "FM radio", "VHS", "TV", "LD" | |
722 Optional: appended PAL, NTSC, SECAM, ... in parentheses | |
723 "SourceContainer" | |
724 "nut", "mkv", "mov", "avi", "ogg", "rm", "mpeg-ps", "mpeg-ts", "raw" | |
725 "SourceCodecTag" | |
726 the source codec id like a fourcc which was used to store a specific | |
727 stream in its SourceContainer | |
728 "CaptureDevice" | |
729 "BT878", "BT848", "webcam", ... (more exact names are fine too) | |
730 "CreationTime" | |
731 "2003-01-20 20:13:15Z", ... | |
732 (ISO 8601 format, see http://www.cl.cam.ac.uk/~mgk25/iso-time.html) | |
733 Note: do not forget the timezone | |
734 "Keywords" | |
735 "Language" | |
736 ISO 639 and ISO 3166 for language/country code | |
737 something like "eng" (US english), can be 0 if unknown | |
738 and "multi" if several languages | |
739 see http://www.loc.gov/standards/iso639-2/englangn.html | |
740 and http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html | |
741 the language code | |
742 "Disposition" | |
743 "original", "dub" (translated), "comment", "lyrics", "karaoke" | |
744 Note: if someone needs some others, please tell us about them, so we | |
745 can add them to the official standard (if they are sane) | |
746 Note: nonstandard fields should be prefixed by "X-" | |
747 Note: names of fields SHOULD be in English if a word with the same | |
748 meaning exists in English | |
749 Note: MUST be less than 64 bytes long | |
750 | |
751 value | |
752 value of this name/type pair | |
753 | |
754 stuffing | |
755 0x80 can be placed in front of any type v entry for stuffing purposes | |
756 except the forward_ptr and all fields in the frame header where a | |
757 maximum of 8 stuffing bytes per field are allowed | |
758 | |
759 | |
760 Structure: | |
761 ---------- | |
762 | |
763 the headers MUST be in exactly the following order (to simplify demuxer design) | |
764 main header | |
765 stream_header (id=0) | |
766 stream_header (id=1) | |
767 ... | |
768 stream_header (id=n) | |
769 | |
770 headers may be repeated, but if they are, then they MUST all be repeated | |
771 together and repeated headers MUST be identical | |
772 | |
773 Each set of repeated headers not at the beginning or end of the file SHOULD | |
774 be stored at the earliest possible position after 2^x where x is | |
775 an integer and the file end, so the headers may be repeated at 4102 if that is | |
776 the closest position after 2^12=4096 at which the headers can be placed | |
777 | |
778 Note: this allows an implementation reading the file to locate backup | |
779 headers in O(log filesize) time as opposed to O(filesize) | |
780 | |
781 headers MUST be placed at least at the start of the file and immediately before | |
782 the index or at the file end if there is no index | |
783 headers MUST be repeated at least twice (so they exist three times in a file) | |
784 | |
785 there MUST be a sync point immediately before the first frame after any headers | |
786 | |
787 | |
788 Index: | |
789 ------ | |
790 | |
791 Note: with realtime streaming, there is no end, so no index there either | |
792 Index MAY only be repeated after main headers. | |
793 If an index is written anywhere in the file, it MUST be written at end of | |
794 file as well. | |
795 | |
796 | |
797 Info: | |
798 ----- | |
799 | |
800 If a info packet is stored anywhere then a muxer MUST also store an identical | |
801 info packet after every main-stream-header set | |
802 | |
803 If a demuxer has seen several info packets with the same chapter_id and | |
804 stream_id then it MUST ignore all but the one with the highest position in | |
805 the file | |
806 | |
807 demxuxers SHOULD not search the whole file for info packets | |
808 | |
809 demuxer (non-normative): | |
810 ------------------------ | |
811 | |
812 in the absence of a valid header at the beginning, players SHOULD search for | |
813 backup headers starting at offset 2^x; for each x players SHOULD end their | |
814 search at a particular offset when any startcode is found (including syncpoint) | |
815 | |
816 | |
817 | |
818 Semantic requirements: | |
819 ====================== | |
820 | |
821 If more than one stream of a given stream class is present, each one SHOULD | |
822 have info tags specifying disposition, and if applicable, language. | |
823 It often highly improves usability and is therefore strongly encouraged. | |
824 | |
825 A demuxer MUST NOT demux a stream which contains more than one stream, or which | |
826 is wrapped in a structure to facilitate more than one stream or otherwise | |
827 duplicate the role of a container. any such file is to be considered invalid. | |
828 for example vorbis in ogg in nut is invalid, as is | |
829 mpegvideo+mpegaudio in mpeg-ps/ts in nut or dvvideo + dvaudio in dv in nut | |
830 | |
831 | |
832 | |
833 Sample code (Public Domain, & untested): | |
834 ======================================== | |
835 | |
836 typedef BufferContext{ | |
837 uint8_t *buf; | |
838 uint8_t *buf_ptr; | |
839 }BufferContext; | |
840 | |
841 static inline uint64_t get_bytes(BufferContext *bc, int count){ | |
842 uint64_t val=0; | |
843 | |
844 assert(count>0 && count<9); | |
845 | |
846 for(i=0; i<count; i++){ | |
847 val <<=8; | |
848 val += *(bc->buf_ptr++); | |
849 } | |
850 | |
851 return val; | |
852 } | |
853 | |
854 static inline void put_bytes(BufferContext *bc, int count, uint64_t val){ | |
855 uint64_t val=0; | |
856 | |
857 assert(count>0 && count<9); | |
858 | |
859 for(i=count-1; i>=0; i--){ | |
860 *(bc->buf_ptr++)= val >> (8*i); | |
861 } | |
862 | |
863 return val; | |
864 } | |
865 | |
866 static inline uint64_t get_v(BufferContext *bc){ | |
867 uint64_t val= 0; | |
868 | |
869 for(; space_left(bc) > 0; ){ | |
870 int tmp= *(bc->buf_ptr++); | |
871 if(tmp&0x80) | |
872 val= (val<<7) + tmp - 0x80; | |
873 else | |
874 return (val<<7) + tmp; | |
875 } | |
876 | |
877 return -1; | |
878 } | |
879 | |
880 static inline int put_v(BufferContext *bc, uint64_t val){ | |
881 int i; | |
882 | |
883 if(space_left(bc) < 9) return -1; | |
884 | |
885 val &= 0x7FFFFFFFFFFFFFFFULL; // FIXME can only encode upto 63 bits currently | |
886 for(i=7; ; i+=7){ | |
887 if(val>>i == 0) break; | |
888 } | |
889 | |
890 for(i-=7; i>0; i-=7){ | |
891 *(bc->buf_ptr++)= 0x80 | (val>>i); | |
892 } | |
893 *(bc->buf_ptr++)= val&0x7F; | |
894 | |
895 return 0; | |
896 } | |
897 | |
898 static int64_t get_dts(int64_t pts, int64_t *pts_cache, int delay, int reset){ | |
899 if(reset) memset(pts_cache, -1, delay*sizeof(int64_t)); | |
900 | |
901 while(delay--){ | |
902 int64_t t= pts_cache[delay]; | |
903 if(t < pts){ | |
904 pts_cache[delay]= pts; | |
905 pts= t; | |
906 } | |
907 } | |
908 | |
909 return pts; | |
910 } | |
911 | |
912 | |
913 | |
914 Authors: | |
915 ======== | |
916 | |
917 Folks from the MPlayer developers mailing list (http://www.mplayerhq.hu/). | |
918 Authors in alphabetical order: (FIXME! Tell us if we left you out) | |
919 Beregszaszi, Alex (alex@fsn.hu) | |
920 Bunkus, Moritz (moritz@bunkus.org) | |
921 Diedrich, Tobias (ranma+mplayer@tdiedrich.de) | |
922 Felker, Rich (dalias@aerifal.cx) | |
923 Franz, Fabian (FabianFranz@gmx.de) | |
924 Gereoffy, Arpad (arpi@thot.banki.hu) | |
925 Hess, Andreas (jaska@gmx.net) | |
926 Niedermayer, Michael (michaelni@gmx.at) | |
927 Shimon, Oded (ods15@ods15.dyndns.org) |