9294
|
1 MPlayer container format draft 0.01
|
|
2
|
|
3
|
|
4
|
|
5 Intro:
|
|
6
|
|
7 Features / goals:
|
|
8 (supported by the format, not necessary by a specific implementation)
|
|
9
|
|
10 Simple
|
|
11 use the same encoding for nearly all fields
|
9295
|
12 simple decoding, so slow cpus can handle it
|
9294
|
13 Extendible
|
|
14 no limit for the possible values for all fields (using universal vlc)
|
|
15 allow adding of new headers in the future
|
|
16 allow adding more fields at the end of headers
|
|
17 Compact
|
|
18 ~0.2% overhead, for normal bitrates
|
|
19 index is <10kb per hour (1 keyframe every 3sec)
|
|
20 Error resistant
|
|
21 seeking / playback without an index
|
|
22 headers & index can be repeated
|
|
23 audio packet reshuffle
|
|
24 checksums to allow quick redownloading of damaged parts
|
|
25
|
|
26
|
|
27
|
|
28 Definitions:
|
|
29
|
|
30 MUST the specific part must be done to conform to this standard
|
|
31 SHOULD its recommanded to be done that way but its not strictly required
|
|
32
|
|
33
|
|
34
|
|
35 Syntax:
|
|
36
|
9295
|
37 Type definitions:
|
|
38 v
|
|
39 value=0
|
|
40 do{
|
|
41 more_data u(1)
|
|
42 data u(7)
|
|
43 value= 128*value + data
|
|
44 }while(more_data)
|
|
45
|
|
46 sz (zero terminated string)
|
|
47 for(i=0; next_byte != 0; i++){
|
|
48 string[i] u(8)
|
|
49 }
|
|
50 zero_byte f(8)
|
|
51
|
|
52 f(x) n fixed bits
|
|
53 u(x) unsigned number encoded in x bits in MSB first order
|
|
54
|
|
55
|
|
56 Bitstream syntax:
|
9294
|
57 packet header
|
|
58 forward ptr v
|
|
59 backward ptr v
|
|
60
|
|
61 align_byte
|
|
62 while(not byte aligned)
|
|
63 one f(1)
|
|
64
|
|
65 reserved_bytes
|
|
66 for(i=0; i<forward_ptr - length_of_non_reserved; i++)
|
|
67 reserved u(8)
|
|
68
|
|
69 main header:
|
|
70 packet header
|
|
71 main_startcode f(64)
|
|
72 version v
|
|
73 stream_count v
|
|
74 file_size v
|
|
75 length_in msec v
|
|
76 reserved_bytes
|
|
77 checksum u(32)
|
|
78
|
|
79 stream_header:
|
|
80 packet_header
|
|
81 stream_startcode f(64)
|
|
82 stream_id v
|
|
83 stream_class v
|
|
84 fourcc v
|
|
85 average_bitrate v
|
|
86 language_code v
|
9297
|
87 time_base_nom v
|
|
88 time_base_denom v
|
9294
|
89 lsb_timestamp_length v
|
|
90 fixed_fps u(1)
|
|
91 codec_specific_header_flag u(1)
|
|
92 reserved u(6)
|
|
93
|
|
94 video_stream_header:
|
|
95 stream_header
|
|
96 width v
|
|
97 height v
|
|
98 sample_width v
|
|
99 sample_height v
|
|
100 colorspace_type v
|
|
101 depth v
|
|
102 reserved_bytes
|
|
103 checksum u(32)
|
|
104
|
|
105 audio_stream_header:
|
|
106 stream_header
|
|
107 samplerate v
|
|
108 channel_count v
|
|
109 sub_packet_size v
|
|
110 shuffle_type v
|
|
111 reserved_bytes
|
|
112 checksum u(32)
|
|
113
|
|
114 codec_specific_header:
|
|
115 packet_header
|
|
116 codec_specific_startcode f(64)
|
|
117 stream_id v
|
|
118 codec_specific data
|
|
119 checksum
|
|
120
|
|
121 frame
|
|
122 packet header
|
|
123 if(keyframe){
|
|
124 keyframe_startcode f(64)
|
|
125 }
|
9299
|
126 lsb_timestamp v
|
|
127 stream_id v
|
9294
|
128 priority u(2)
|
|
129 checksum_flag u(1)
|
|
130 msb_timestamp_flag u(1)
|
9299
|
131 reserved u(4)
|
9294
|
132 if(msb_timestamp_flag)
|
|
133 msb_timestamp v
|
|
134 bitstream
|
|
135 if(checksum_flag)
|
|
136 checksum u(32)
|
|
137
|
|
138 Index:
|
|
139 packet header
|
|
140 index_startcode f(64)
|
|
141 stream_id v
|
|
142 index_length v
|
|
143 for(i=0; i<index_length; i++){
|
|
144 index_timestamp v
|
|
145 index_position v
|
|
146 }
|
|
147 checksum u(32)
|
|
148
|
|
149 info_header: (optional)
|
|
150 packet header
|
|
151 info_startcode f(64)
|
9295
|
152 entry_count v
|
|
153 for(i=0; i<entry_count; i++){
|
|
154 name sz
|
|
155 value sz
|
|
156 }
|
9294
|
157 checksum u(32)
|
|
158
|
|
159
|
|
160 forward_ptr
|
|
161 backward_ptr
|
|
162 pointer to the next / previous packet
|
|
163 Note: a frame with 0 bytes means that its skiped
|
|
164
|
|
165 version
|
|
166 0 for now
|
|
167
|
|
168 file_size
|
|
169 size in bytes, can be 0 if not meaningfull (realtime streams, ...)
|
|
170
|
|
171 length_in_msec
|
|
172 length of the file in milli seconds (can be 0 if realtime or such)
|
|
173
|
|
174 stream_id
|
|
175 Note: streams with a lower relative class MUST have a lower relative id
|
|
176 so a stream with class 0 MUST allways have a id which is lower then any
|
|
177 stream with class > 0
|
9295
|
178 streams should use low ids
|
9294
|
179
|
|
180 stream_class
|
|
181 0 video
|
|
182 32 audio
|
|
183 64 subtiles
|
|
184 Note the remaining values are reserved and MUST NOT be used
|
|
185
|
|
186 fourcc
|
|
187 identification for the codec
|
|
188 example: 'h'<<24 + '2'<<16 + '6'<<8 + '4'
|
|
189
|
|
190 language_code
|
|
191 something like 'u'<<24 + 's'<<16 + 'e'<<8 + 'n' (US english), can be 0
|
|
192 if unknown
|
|
193
|
9297
|
194 time_base_nom / time_base_denom = time_base
|
9294
|
195 the number of timer ticks per second, this MUST be equal to the fps
|
|
196 if the fixed_fps is 1
|
9297
|
197 time_base_denom MUST not be 0
|
|
198 time_base_nom and time_base_denom MUST be relative prime
|
|
199 time_base_nom MUST be < 2^15
|
|
200 examples:
|
|
201 fps time_base_nom time_base_denom
|
|
202 30 30 1
|
|
203 29.97 30000 1001
|
|
204 23.976 24000 1001
|
9294
|
205
|
|
206 lsb_timestamp_length
|
|
207 length in bits of the lsb_timestamp
|
|
208 MUST be <16
|
|
209
|
|
210 fixed_fps
|
|
211 1 indicates that the fps is fixed
|
|
212
|
|
213 codec_specific_header_flag
|
|
214 1 indicates that this stream has a codec specific header
|
|
215
|
|
216 msb_timestamp_flag
|
|
217 indicates that the msb_timestamp is coded
|
|
218 MUST be 1 for keyframes
|
|
219
|
|
220 msb_timestamp
|
|
221 most significant bits of the timestamp, SHOULD be 0 for the first frame
|
|
222
|
|
223 lsb_timestamp
|
|
224 most significant bits of the timestamp in time_base precission, with
|
|
225 lsb_timestamp_length bits
|
|
226 Example: IBBP display order
|
|
227 keyframe msb_timestamp=0 lsb_timestamp=0 -> timestamp=0
|
|
228 frame lsb_timestamp=3 -> timestamp=3
|
|
229 frame lsb_timestamp=1 -> timestamp=1
|
|
230 frame lsb_timestamp=2 -> timestamp=2
|
|
231 ...
|
|
232 keyframe msb_timestamp=1 lsb_timestamp=1 -> timestamp=257
|
|
233 frame msb_timestamp=0 lsb_timestamp=255->timestamp=255
|
|
234 frame msb_timestamp=1 lsb_timestamp=0 -> timestamp=256
|
|
235 frame lsb_timestamp=4 -> timestamp=260
|
|
236 frame lsb_timestamp=2 -> timestamp=258
|
|
237 frame lsb_timestamp=3 -> timestamp=259
|
|
238
|
|
239 width/height
|
|
240 MUST be set to the coded width/height
|
|
241
|
|
242 sample_width/sample_height (aspect ratio)
|
|
243 sample_width is the horizontal distance between samples
|
|
244 sample_width and sample_height MUST be relative prime if not zero
|
|
245 MUST be 0 if unknown
|
|
246
|
|
247 depth
|
|
248 for compatibility with some win32 codecs
|
|
249
|
|
250 priority
|
|
251 if 0 then the frame isnt used as reference (b frame) and can be droped
|
|
252 MUST be > 0 for keyframes
|
|
253
|
|
254 sub_packet_size
|
|
255 size of an audio packet
|
|
256 Note a subpacket MUST be in exactly one packet, it cannot be split
|
|
257
|
|
258 shuffle_type
|
|
259 audio is often encoded in small fixed size packets, and to increase the
|
|
260 error robustness these can be shuffled
|
|
261 0 -> no shuffle
|
|
262 1-16 -> interleave packets by 2^n
|
|
263
|
|
264 checksum
|
|
265 crc32 checksum using the generator polynomial=0x04c11db7 (same as ogg)
|
|
266
|
|
267 checksum_flag
|
|
268 indicates that the frame_checksum is coded
|
|
269 must be 1 for the last non keyframe before a keyframe
|
|
270
|
|
271 frame_checksum
|
|
272 identical to checksum, but instead of covering just the current
|
|
273 packet, it covers all frames of the same stream id since the last
|
|
274 checksum
|
|
275 this field is only coded if checksum_flag=1
|
|
276
|
|
277 index_timestamp
|
|
278 value in time_base precission, relative to the last index_timestamp
|
|
279
|
|
280 index_position
|
|
281 position in bytes of the first byte of the keyframe header, relative
|
|
282 to the last index_position
|
|
283
|
9295
|
284 name
|
|
285 the name of the info entry, valid names are
|
|
286 "Author","Description","Copyright","Encoder","Title"
|
|
287 Note: if someone needs some others, please tell us about them, so we can
|
|
288 add them to the official standard (if they are sane)
|
|
289
|
|
290 value
|
|
291
|
9294
|
292
|
|
293
|
|
294 Structure:
|
|
295
|
|
296 the headers MUST be in exactly the following order (to simplify demuxer design)
|
|
297 main header
|
|
298 stream_header (id=0)
|
|
299 codec_specific_header (id=0)
|
|
300 stream_header (id=1)
|
|
301 codec_specific_header (id=1)
|
|
302 ...
|
|
303 stream_header (id=n)
|
|
304 codec_specific_header (id=n)
|
|
305
|
|
306 headers may be repated, but if they are then they MUST all be repeated together
|
|
307 and repeated headers MUST be identical
|
|
308
|
|
309 headers MUST be repeated every 10sec at least ? FIXME
|
9295
|
310
|
|
311 the info_header can be repeated, it can also contain different names & values
|
|
312 each time
|
9294
|
313
|
|
314
|
|
315 Sample code (GPL, & untested)
|
|
316
|
|
317 typedef BufferContext{
|
|
318 uint8_t *buf;
|
|
319 uint8_t *buf_ptr;
|
|
320 }BufferContext;
|
|
321
|
|
322 static inline uint64_t get_bytes(BufferContext *bc, int count){
|
|
323 uint64_t val=0;
|
|
324
|
|
325 assert(count>0 && count<9)
|
|
326
|
|
327 for(i=0; i<count; i++){
|
|
328 val <<=8;
|
|
329 val += *(bc->buf_ptr++);
|
|
330 }
|
|
331
|
|
332 return val;
|
|
333 }
|
|
334
|
|
335 static inline void put_bytes(BufferContext *bc, int count, uint64_t val){
|
|
336 uint64_t val=0;
|
|
337
|
|
338 assert(count>0 && count<9)
|
|
339
|
|
340 for(i=count-1; i>=0; i--){
|
|
341 *(bc->buf_ptr++)= val >> (8*i);
|
|
342 }
|
|
343
|
|
344 return val;
|
|
345 }
|
|
346
|
|
347 static inline uint64_t get_v(BufferContext *bc){
|
|
348 uint64_t val= 0;
|
|
349
|
|
350 for(;;){
|
|
351 int tmp= *(bc->buf_ptr++);
|
|
352 if(tmp&0x80)
|
|
353 val= (val<<7) + tmp - 0x80;
|
|
354 else
|
9299
|
355 return (val<<7) + tmp;
|
9294
|
356 }
|
|
357 }
|
|
358
|
|
359 static inline void put_v(BufferContext *bc, uint64_t val){
|
|
360 int i;
|
|
361
|
|
362 assert(val);
|
|
363
|
|
364 for(i=56;; i-=8){
|
|
365 if(val>>i) break;
|
|
366 }
|
|
367
|
|
368 for(;i>0; i-=8){
|
|
369 *(bc->buf_ptr++)= 0x80 | (val>>i);
|
|
370 }
|
|
371 *(bc->buf_ptr++)= val&0x7F;
|
|
372 }
|
|
373
|
|
374
|
|
375 Example stream
|
|
376
|
|
377 main header
|
|
378 video_stream_header (stream 0, video jpjp, timebase 30, lsb_timestamp_length=8)
|
|
379 codec_specific_header (stream 0)
|
|
380 video_stream_header (stream 1 subtitle usen, timebase 30, lsb_timestamp_length=8)
|
|
381 video_stream_header (stream 2 subtitle atde, timebase 30, lsb_timestamp_length=8)
|
|
382 audio_stream_header (stream 3, audio jpjp, timebase 1 , lsb_timestamp_length=8)
|
|
383 audio_stream_header (stream 4, audio usen, timebase 1 , lsb_timestamp_length=8)
|
|
384 index (stream 0)
|
|
385 keyframe (stream 0, msb_timestamp=0, lsb_timestamp=0)
|
|
386 keyframe (stream 1, msb_timestamp=0, lsb_timestamp=0)
|
|
387 keyframe (stream 2, msb_timestamp=0, lsb_timestamp=0)
|
|
388 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=0)
|
|
389 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=0)
|
|
390 frame (stream 0, lsb_timestamp=1)
|
|
391 frame (stream 0, lsb_timestamp=2)
|
|
392 ...
|
|
393 frame (stream 0, lsb_timestamp=30)
|
|
394 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=1)
|
|
395 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=1)
|
|
396 frame (stream 0, lsb_timestamp=31)
|
|
397 frame (stream 0, lsb_timestamp=32)
|
|
398 ...
|
|
399 frame (stream 0, lsb_timestamp=60)
|
|
400 frame (stream 1, lsb_timestamp=60)
|
|
401 frame (stream 2, lsb_timestamp=60)
|
|
402 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=2)
|
|
403 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=2)
|
|
404 frame (stream 0, lsb_timestamp=61)
|
|
405 frame (stream 0, lsb_timestamp=62)
|
|
406 ...
|
|
407 main header
|
|
408 video_stream_header (stream 0, video jpjp, timebase 30, lsb_timestamp_length=8)
|
|
409 codec_specific_header (stream 0)
|
|
410 video_stream_header (stream 1 subtitle usen, timebase 30, lsb_timestamp_length=8)
|
|
411 video_stream_header (stream 2 subtitle atde, timebase 30, lsb_timestamp_length=8)
|
|
412 audio_stream_header (stream 3, audio jpjp, timebase 1 , lsb_timestamp_length=8)
|
|
413 audio_stream_header (stream 4, audio usen, timebase 1 , lsb_timestamp_length=8)
|
|
414 frame (stream 0, lsb_timestamp=255)
|
|
415 frame (stream 0, msb_timestamp=1 lsb_timestamp=0)
|
|
416 frame (stream 0, lsb_timestamp=1)
|
|
417 frame (stream 0, lsb_timestamp=2)
|
|
418 frame (stream 1, msb_timestamp=1 lsb_timestamp=2)
|
|
419 frame (stream 2, msb_timestamp=1 lsb_timestamp=2)
|
|
420 frame (stream 0, lsb_timestamp=3)
|
|
421 frame (stream 0, lsb_timestamp=4)
|
|
422 ...
|
|
423 keyframe (stream 3, msb_timestamp=0, lsb_timestamp=9)
|
|
424 keyframe (stream 4, msb_timestamp=0, lsb_timestamp=9)
|
|
425 main header
|
|
426 video_stream_header (stream 0, video jpjp, timebase 30, lsb_timestamp_length=8)
|
|
427 codec_specific_header (stream 0)
|
|
428 video_stream_header (stream 1 subtitle usen, timebase 30, lsb_timestamp_length=8)
|
|
429 video_stream_header (stream 2 subtitle atde, timebase 30, lsb_timestamp_length=8)
|
|
430 audio_stream_header (stream 3, audio jpjp, timebase 1 , lsb_timestamp_length=8)
|
|
431 audio_stream_header (stream 4, audio usen, timebase 1 , lsb_timestamp_length=8)
|
|
432 index (stream 0)
|