Mercurial > mplayer.hg
annotate DOCS/tech/mpcf.txt @ 9325:18cdba535f36
hex editor friendly language_code
author | michael |
---|---|
date | Fri, 07 Feb 2003 22:13:54 +0000 |
parents | 774afe44f17b |
children | de287fe94511 |
rev | line source |
---|---|
9294 | 1 MPlayer container format draft 0.01 |
2 | |
3 | |
4 | |
5 Intro: | |
6 | |
7 Features / goals: | |
8 (supported by the format, not necessary by a specific implementation) | |
9 | |
10 Simple | |
11 use the same encoding for nearly all fields | |
9295 | 12 simple decoding, so slow cpus can handle it |
9294 | 13 Extendible |
14 no limit for the possible values for all fields (using universal vlc) | |
15 allow adding of new headers in the future | |
16 allow adding more fields at the end of headers | |
17 Compact | |
18 ~0.2% overhead, for normal bitrates | |
19 index is <10kb per hour (1 keyframe every 3sec) | |
20 Error resistant | |
21 seeking / playback without an index | |
22 headers & index can be repeated | |
23 audio packet reshuffle | |
24 checksums to allow quick redownloading of damaged parts | |
25 | |
26 | |
27 | |
28 Definitions: | |
29 | |
30 MUST the specific part must be done to conform to this standard | |
31 SHOULD its recommanded to be done that way but its not strictly required | |
32 | |
33 | |
34 | |
35 Syntax: | |
36 | |
9295 | 37 Type definitions: |
38 v | |
39 value=0 | |
40 do{ | |
41 more_data u(1) | |
42 data u(7) | |
43 value= 128*value + data | |
44 }while(more_data) | |
9323 | 45 |
46 b (binary data or string) | |
47 length v | |
48 for(i=0; i<length; i++){ | |
49 data[i] u(8) | |
9295 | 50 } |
51 | |
9323 | 52 |
9295 | 53 f(x) n fixed bits |
54 u(x) unsigned number encoded in x bits in MSB first order | |
55 | |
56 | |
57 Bitstream syntax: | |
9294 | 58 packet header |
59 forward ptr v | |
60 backward ptr v | |
61 | |
62 align_byte | |
63 while(not byte aligned) | |
64 one f(1) | |
65 | |
66 reserved_bytes | |
67 for(i=0; i<forward_ptr - length_of_non_reserved; i++) | |
68 reserved u(8) | |
69 | |
70 main header: | |
71 packet header | |
72 main_startcode f(64) | |
73 version v | |
74 stream_count v | |
75 file_size v | |
9310 | 76 length_in_msec v |
9294 | 77 reserved_bytes |
78 checksum u(32) | |
79 | |
80 stream_header: | |
81 packet_header | |
82 stream_startcode f(64) | |
83 stream_id v | |
84 stream_class v | |
9323 | 85 fourcc b |
9294 | 86 average_bitrate v |
9325 | 87 language_code b |
9297 | 88 time_base_nom v |
89 time_base_denom v | |
9294 | 90 lsb_timestamp_length v |
91 fixed_fps u(1) | |
92 codec_specific_header_flag u(1) | |
93 reserved u(6) | |
94 | |
95 video_stream_header: | |
96 stream_header | |
97 width v | |
98 height v | |
99 sample_width v | |
100 sample_height v | |
101 colorspace_type v | |
102 depth v | |
103 reserved_bytes | |
104 checksum u(32) | |
105 | |
106 audio_stream_header: | |
107 stream_header | |
108 samplerate v | |
109 channel_count v | |
110 sub_packet_size v | |
111 shuffle_type v | |
112 reserved_bytes | |
113 checksum u(32) | |
114 | |
115 codec_specific_header: | |
116 packet_header | |
117 codec_specific_startcode f(64) | |
118 stream_id v | |
119 codec_specific data | |
120 checksum | |
121 | |
122 frame | |
123 packet header | |
124 if(keyframe){ | |
125 keyframe_startcode f(64) | |
126 } | |
9311
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
127 zero_bit f(1) |
9294 | 128 priority u(2) |
129 checksum_flag u(1) | |
130 msb_timestamp_flag u(1) | |
9311
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
131 reserved u(3) |
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
132 lsb_timestamp v |
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
133 stream_id v |
9294 | 134 if(msb_timestamp_flag) |
135 msb_timestamp v | |
136 bitstream | |
137 if(checksum_flag) | |
9312 | 138 frame_checksum u(32) |
9294 | 139 |
140 Index: | |
141 packet header | |
142 index_startcode f(64) | |
143 stream_id v | |
144 index_length v | |
145 for(i=0; i<index_length; i++){ | |
146 index_timestamp v | |
147 index_position v | |
148 } | |
9310 | 149 reserved_bytes |
9294 | 150 checksum u(32) |
151 | |
9310 | 152 info_packet: (optional) |
9294 | 153 packet header |
154 info_startcode f(64) | |
9310 | 155 start_time v |
156 end_time v | |
9323 | 157 for(;;){ |
158 type b | |
159 if(type=="") break; | |
160 name b | |
161 value b | |
162 } | |
9310 | 163 reserved_bytes |
9294 | 164 checksum u(32) |
9323 | 165 |
9310 | 166 stuffing_packet: (optional) |
167 packet_header | |
168 stuffing_startcode f(64) | |
169 for(i=0; i<forward_ptr - length_of_non_reserved; i++) | |
170 stuffing f(8) | |
9294 | 171 |
9323 | 172 |
9294 | 173 forward_ptr |
174 backward_ptr | |
175 pointer to the next / previous packet | |
9323 | 176 pointers are relative and backward pointer is implicitelly negative |
9294 | 177 Note: a frame with 0 bytes means that its skiped |
9323 | 178 Note: the forward pointer is equal to the size of this packet including |
179 the header | |
180 the backward pointer is equal to the size of the previous packet | |
181 Example: | |
182 0 | |
183 size1 (size of frame1 including header) | |
184 frame1 | |
185 | |
186 size1 | |
187 size2 | |
188 frame2 | |
189 | |
190 size2 | |
191 size3 | |
192 frame3 | |
193 | |
194 | |
195 *_startcode | |
196 the first bit is allways set | |
9294 | 197 |
198 version | |
199 0 for now | |
200 | |
201 file_size | |
202 size in bytes, can be 0 if not meaningfull (realtime streams, ...) | |
203 | |
204 length_in_msec | |
205 length of the file in milli seconds (can be 0 if realtime or such) | |
206 | |
207 stream_id | |
208 Note: streams with a lower relative class MUST have a lower relative id | |
209 so a stream with class 0 MUST allways have a id which is lower then any | |
210 stream with class > 0 | |
9295 | 211 streams should use low ids |
9294 | 212 |
213 stream_class | |
214 0 video | |
215 32 audio | |
216 64 subtiles | |
217 Note the remaining values are reserved and MUST NOT be used | |
9312 | 218 a decoder MUST ignore streams with reserved classes |
9294 | 219 |
220 fourcc | |
221 identification for the codec | |
9323 | 222 example: "H264" |
9325 | 223 MUST contain 4 bytes, note, this might be increasd in the future if |
224 needed | |
9294 | 225 |
226 language_code | |
9325 | 227 something like "usen" (US english), can be 0 |
9294 | 228 if unknown |
229 | |
9297 | 230 time_base_nom / time_base_denom = time_base |
9294 | 231 the number of timer ticks per second, this MUST be equal to the fps |
232 if the fixed_fps is 1 | |
9297 | 233 time_base_denom MUST not be 0 |
234 time_base_nom and time_base_denom MUST be relative prime | |
235 time_base_nom MUST be < 2^15 | |
236 examples: | |
237 fps time_base_nom time_base_denom | |
238 30 30 1 | |
239 29.97 30000 1001 | |
240 23.976 24000 1001 | |
9294 | 241 |
242 lsb_timestamp_length | |
243 length in bits of the lsb_timestamp | |
244 MUST be <16 | |
245 | |
246 fixed_fps | |
247 1 indicates that the fps is fixed | |
248 | |
249 codec_specific_header_flag | |
250 1 indicates that this stream has a codec specific header | |
251 | |
252 msb_timestamp_flag | |
253 indicates that the msb_timestamp is coded | |
254 MUST be 1 for keyframes | |
255 | |
256 msb_timestamp | |
257 most significant bits of the timestamp, SHOULD be 0 for the first frame | |
258 | |
259 lsb_timestamp | |
260 most significant bits of the timestamp in time_base precission, with | |
261 lsb_timestamp_length bits | |
262 Example: IBBP display order | |
263 keyframe msb_timestamp=0 lsb_timestamp=0 -> timestamp=0 | |
264 frame lsb_timestamp=3 -> timestamp=3 | |
265 frame lsb_timestamp=1 -> timestamp=1 | |
266 frame lsb_timestamp=2 -> timestamp=2 | |
267 ... | |
268 keyframe msb_timestamp=1 lsb_timestamp=1 -> timestamp=257 | |
269 frame msb_timestamp=0 lsb_timestamp=255->timestamp=255 | |
270 frame msb_timestamp=1 lsb_timestamp=0 -> timestamp=256 | |
271 frame lsb_timestamp=4 -> timestamp=260 | |
272 frame lsb_timestamp=2 -> timestamp=258 | |
273 frame lsb_timestamp=3 -> timestamp=259 | |
274 | |
275 width/height | |
276 MUST be set to the coded width/height | |
277 | |
278 sample_width/sample_height (aspect ratio) | |
279 sample_width is the horizontal distance between samples | |
280 sample_width and sample_height MUST be relative prime if not zero | |
281 MUST be 0 if unknown | |
282 | |
283 depth | |
284 for compatibility with some win32 codecs | |
285 | |
9311
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
286 zero_bit |
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
287 MUST be 0, its there to distinguish non keyframes from other packets, |
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
288 Note: all packets have a 64-bit startcode except non-keyframes to reduce |
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
289 their size, and all startcodes start with a 1 bit |
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
290 |
9294 | 291 priority |
292 if 0 then the frame isnt used as reference (b frame) and can be droped | |
293 MUST be > 0 for keyframes | |
294 | |
295 sub_packet_size | |
296 size of an audio packet | |
297 Note a subpacket MUST be in exactly one packet, it cannot be split | |
298 | |
299 shuffle_type | |
300 audio is often encoded in small fixed size packets, and to increase the | |
301 error robustness these can be shuffled | |
302 0 -> no shuffle | |
303 1-16 -> interleave packets by 2^n | |
304 | |
305 checksum | |
9307
ec18ad315bbe
10l (copy & pasting the generator poly for crc32 from ogg was a bad idea...)
michael
parents:
9299
diff
changeset
|
306 crc32 checksum using the generator polynomial 0x104c11db7 (same as ogg) |
9294 | 307 |
308 checksum_flag | |
309 indicates that the frame_checksum is coded | |
310 must be 1 for the last non keyframe before a keyframe | |
311 | |
312 frame_checksum | |
313 identical to checksum, but instead of covering just the current | |
314 packet, it covers all frames of the same stream id since the last | |
9312 | 315 frame_checksum |
9294 | 316 this field is only coded if checksum_flag=1 |
317 | |
318 index_timestamp | |
319 value in time_base precission, relative to the last index_timestamp | |
320 | |
321 index_position | |
322 position in bytes of the first byte of the keyframe header, relative | |
323 to the last index_position | |
324 | |
9310 | 325 start_time, stop_time |
326 the time range in msecs to which the info applies | |
327 Note: can be used to mark chapters | |
9323 | 328 |
329 type | |
330 the fourcc of the type | |
331 for example: "STR " -> String or "JPEG" -> jpeg image | |
332 0 length means end | |
333 | |
9295 | 334 name |
335 the name of the info entry, valid names are | |
9323 | 336 "Author","Description","Copyright","Encoder","Title","CDCover" |
9295 | 337 Note: if someone needs some others, please tell us about them, so we can |
338 add them to the official standard (if they are sane) | |
339 | |
340 value | |
341 | |
9310 | 342 stuffing |
343 0xFF | |
9294 | 344 |
345 Structure: | |
346 | |
347 the headers MUST be in exactly the following order (to simplify demuxer design) | |
348 main header | |
349 stream_header (id=0) | |
350 codec_specific_header (id=0) | |
351 stream_header (id=1) | |
352 codec_specific_header (id=1) | |
353 ... | |
354 stream_header (id=n) | |
355 codec_specific_header (id=n) | |
356 | |
357 headers may be repated, but if they are then they MUST all be repeated together | |
358 and repeated headers MUST be identical | |
359 | |
360 headers MUST be repeated every 10sec at least ? FIXME | |
9310 | 361 headers MUST be repeated at least twice (so they exist 3 times in a file) |
9295 | 362 |
9310 | 363 Index |
364 the index can be repeated but there MUST be at least one at the end | |
9311
4b04416ada91
zero_bit for normal frames, so we can distinguish them from other packets
michael
parents:
9310
diff
changeset
|
365 Note: in case of realtime streaming there is no end, so no index there either |
9310 | 366 |
367 Info packets | |
368 the info_packet can be repeated, it can also contain different names & values | |
369 each time but only if allso the time is different | |
370 Info packets can be used to describe the file or some part of it (chapters) | |
371 | |
372 info packets, SHOULD be placed at the begin of the file at least | |
373 for realtime streaming info packets will normally be transmitted when they apply | |
374 for example, the current song title & artist of the currently shown music video | |
375 | |
376 Stuffing packets | |
377 can be used as a filler, for example to leave some empty space at the begin for | |
378 a copy of the index | |
379 | |
380 Unknown packets | |
381 MUST be ignored by the decoder | |
382 | |
9294 | 383 Sample code (GPL, & untested) |
384 | |
385 typedef BufferContext{ | |
386 uint8_t *buf; | |
387 uint8_t *buf_ptr; | |
388 }BufferContext; | |
389 | |
390 static inline uint64_t get_bytes(BufferContext *bc, int count){ | |
391 uint64_t val=0; | |
392 | |
393 assert(count>0 && count<9) | |
394 | |
395 for(i=0; i<count; i++){ | |
396 val <<=8; | |
397 val += *(bc->buf_ptr++); | |
398 } | |
399 | |
400 return val; | |
401 } | |
402 | |
403 static inline void put_bytes(BufferContext *bc, int count, uint64_t val){ | |
404 uint64_t val=0; | |
405 | |
406 assert(count>0 && count<9) | |
407 | |
408 for(i=count-1; i>=0; i--){ | |
409 *(bc->buf_ptr++)= val >> (8*i); | |
410 } | |
411 | |
412 return val; | |
413 } | |
414 | |
415 static inline uint64_t get_v(BufferContext *bc){ | |
416 uint64_t val= 0; | |
417 | |
418 for(;;){ | |
419 int tmp= *(bc->buf_ptr++); | |
420 if(tmp&0x80) | |
421 val= (val<<7) + tmp - 0x80; | |
422 else | |
9299 | 423 return (val<<7) + tmp; |
9294 | 424 } |
425 } | |
426 | |
427 static inline void put_v(BufferContext *bc, uint64_t val){ | |
428 int i; | |
429 | |
430 assert(val); | |
431 | |
432 for(i=56;; i-=8){ | |
433 if(val>>i) break; | |
434 } | |
435 | |
436 for(;i>0; i-=8){ | |
437 *(bc->buf_ptr++)= 0x80 | (val>>i); | |
438 } | |
439 *(bc->buf_ptr++)= val&0x7F; | |
440 } | |
441 | |
442 | |
443 Example stream | |
444 | |
445 main header | |
446 video_stream_header (stream 0, video jpjp, timebase 30, lsb_timestamp_length=8) | |
447 codec_specific_header (stream 0) | |
448 video_stream_header (stream 1 subtitle usen, timebase 30, lsb_timestamp_length=8) | |
449 video_stream_header (stream 2 subtitle atde, timebase 30, lsb_timestamp_length=8) | |
450 audio_stream_header (stream 3, audio jpjp, timebase 1 , lsb_timestamp_length=8) | |
451 audio_stream_header (stream 4, audio usen, timebase 1 , lsb_timestamp_length=8) | |
452 index (stream 0) | |
453 keyframe (stream 0, msb_timestamp=0, lsb_timestamp=0) | |
454 keyframe (stream 1, msb_timestamp=0, lsb_timestamp=0) | |
455 keyframe (stream 2, msb_timestamp=0, lsb_timestamp=0) | |
456 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=0) | |
457 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=0) | |
458 frame (stream 0, lsb_timestamp=1) | |
459 frame (stream 0, lsb_timestamp=2) | |
460 ... | |
461 frame (stream 0, lsb_timestamp=30) | |
462 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=1) | |
463 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=1) | |
464 frame (stream 0, lsb_timestamp=31) | |
465 frame (stream 0, lsb_timestamp=32) | |
466 ... | |
467 frame (stream 0, lsb_timestamp=60) | |
468 frame (stream 1, lsb_timestamp=60) | |
469 frame (stream 2, lsb_timestamp=60) | |
470 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=2) | |
471 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=2) | |
472 frame (stream 0, lsb_timestamp=61) | |
473 frame (stream 0, lsb_timestamp=62) | |
474 ... | |
475 main header | |
476 video_stream_header (stream 0, video jpjp, timebase 30, lsb_timestamp_length=8) | |
477 codec_specific_header (stream 0) | |
478 video_stream_header (stream 1 subtitle usen, timebase 30, lsb_timestamp_length=8) | |
479 video_stream_header (stream 2 subtitle atde, timebase 30, lsb_timestamp_length=8) | |
480 audio_stream_header (stream 3, audio jpjp, timebase 1 , lsb_timestamp_length=8) | |
481 audio_stream_header (stream 4, audio usen, timebase 1 , lsb_timestamp_length=8) | |
482 frame (stream 0, lsb_timestamp=255) | |
483 frame (stream 0, msb_timestamp=1 lsb_timestamp=0) | |
484 frame (stream 0, lsb_timestamp=1) | |
485 frame (stream 0, lsb_timestamp=2) | |
486 frame (stream 1, msb_timestamp=1 lsb_timestamp=2) | |
487 frame (stream 2, msb_timestamp=1 lsb_timestamp=2) | |
488 frame (stream 0, lsb_timestamp=3) | |
489 frame (stream 0, lsb_timestamp=4) | |
490 ... | |
491 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=9) | |
492 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=9) | |
493 main header | |
494 video_stream_header (stream 0, video jpjp, timebase 30, lsb_timestamp_length=8) | |
495 codec_specific_header (stream 0) | |
496 video_stream_header (stream 1 subtitle usen, timebase 30, lsb_timestamp_length=8) | |
497 video_stream_header (stream 2 subtitle atde, timebase 30, lsb_timestamp_length=8) | |
498 audio_stream_header (stream 3, audio jpjp, timebase 1 , lsb_timestamp_length=8) | |
499 audio_stream_header (stream 4, audio usen, timebase 1 , lsb_timestamp_length=8) | |
500 index (stream 0) |