comparison libmpdemux/ebml.c @ 11807:9a81d7b4c0b6

Added the new C based Matroska demuxer by Aurelien Jacobs.
author mosu
date Mon, 19 Jan 2004 19:16:10 +0000
parents
children d9ff6528514d
comparison
equal deleted inserted replaced
11806:4f14825fd446 11807:9a81d7b4c0b6
1 /*
2 * native ebml reader for the Matroska demuxer
3 * Written by Aurelien Jacobs <aurel@gnuage.org>
4 * Based on the one written by Ronald Bultje for gstreamer
5 * Licence: GPL
6 */
7
8 #include "config.h"
9 #ifdef HAVE_MATROSKA
10
11 #include <stdlib.h>
12
13 #include "stream.h"
14 #include "ebml.h"
15
16
17 /*
18 * Read: the element content data ID.
19 * Return: the ID.
20 */
21 uint32_t
22 ebml_read_id (stream_t *s, int *length)
23 {
24 int i, len_mask = 0x80;
25 uint32_t id;
26
27 for (i=0, id=stream_read_char (s); i<4 && !(id & len_mask); i++)
28 len_mask >>= 1;
29 if (i >= 4)
30 return EBML_ID_INVALID;
31 if (length)
32 *length = i + 1;
33 while (i--)
34 id = (id << 8) | stream_read_char (s);
35 return id;
36 }
37
38 /*
39 * Read a variable length unsigned int.
40 */
41 uint64_t
42 ebml_read_vlen_uint (uint8_t *buffer, int *length)
43 {
44 int i, j, num_ffs = 0, len_mask = 0x80;
45 uint64_t num;
46
47 for (i=0, num=*buffer++; i<8 && !(num & len_mask); i++)
48 len_mask >>= 1;
49 if (i >= 8)
50 return EBML_UINT_INVALID;
51 j = i+1;
52 if (length)
53 *length = j;
54 if (((int)num &= (len_mask - 1)) == len_mask - 1)
55 num_ffs++;
56 while (i--)
57 {
58 num = (num << 8) | *buffer++;
59 if ((num & 0xFF) == 0xFF)
60 num_ffs++;
61 }
62 if (j == num_ffs)
63 return EBML_UINT_INVALID;
64 return num;
65 }
66
67 /*
68 * Read a variable length signed int.
69 */
70 int64_t
71 ebml_read_vlen_int (uint8_t *buffer, int *length)
72 {
73 uint64_t unum;
74 int l;
75
76 /* read as unsigned number first */
77 unum = ebml_read_vlen_uint (buffer, &l);
78 if (unum == EBML_UINT_INVALID)
79 return EBML_INT_INVALID;
80 if (length)
81 *length = l;
82
83 return unum - ((1 << ((7 * l) - 1)) - 1);
84 }
85
86 /*
87 * Read: element content length.
88 */
89 uint64_t
90 ebml_read_length (stream_t *s, int *length)
91 {
92 int i, j, num_ffs = 0, len_mask = 0x80;
93 uint64_t len;
94
95 for (i=0, len=stream_read_char (s); i<8 && !(len & len_mask); i++)
96 len_mask >>= 1;
97 if (i >= 8)
98 return EBML_UINT_INVALID;
99 j = i+1;
100 if (length)
101 *length = j;
102 if (((int)len &= (len_mask - 1)) == len_mask - 1)
103 num_ffs++;
104 while (i--)
105 {
106 len = (len << 8) | stream_read_char (s);
107 if ((len & 0xFF) == 0xFF)
108 num_ffs++;
109 }
110 if (j == num_ffs)
111 return EBML_UINT_INVALID;
112 return len;
113 }
114
115 /*
116 * Read the next element as an unsigned int.
117 */
118 uint64_t
119 ebml_read_uint (stream_t *s, uint64_t *length)
120 {
121 uint64_t len, value = 0;
122 int l;
123
124 len = ebml_read_length (s, &l);
125 if (len == EBML_UINT_INVALID || len < 1 || len > 8)
126 return EBML_UINT_INVALID;
127 if (length)
128 *length = len + l;
129
130 while (len--)
131 value = (value << 8) | stream_read_char (s);
132
133 return value;
134 }
135
136 /*
137 * Read the next element as a signed int.
138 */
139 int64_t
140 ebml_read_int (stream_t *s, uint64_t *length)
141 {
142 int64_t value = 0;
143 uint64_t len;
144 int l;
145
146 len = ebml_read_length (s, &l);
147 if (len == EBML_UINT_INVALID || len < 1 || len > 8)
148 return EBML_INT_INVALID;
149 if (length)
150 *length = len + l;
151
152 len--;
153 l = stream_read_char (s);
154 if (l & 0x80)
155 value = -1;
156 value = (value << 8) | l;
157 while (len--)
158 value = (value << 8) | stream_read_char (s);
159
160 return value;
161 }
162
163 /*
164 * Read the next element as a float.
165 */
166 long double
167 ebml_read_float (stream_t *s, uint64_t *length)
168 {
169 long double value;
170 uint64_t len;
171 int l;
172
173 len = ebml_read_length (s, &l);
174 switch (len)
175 {
176 case 4:
177 {
178 uint32_t i;
179 float *f;
180 #ifndef WORDS_BIGENDIAN
181 i = stream_read_dword (s);
182 #else
183 i = stream_read_dword_le (s);
184 #endif
185 f = (float *) (void *) &i;
186 value = *f;
187 break;
188 }
189
190 case 8:
191 {
192 uint64_t i;
193 double *d;
194 #ifndef WORDS_BIGENDIAN
195 i = stream_read_qword (s);
196 #else
197 i = stream_read_qword_le (s);
198 #endif
199 d = (double *) (void *) &i;
200 value = *d;
201 break;
202 }
203
204 case 10:
205 {
206 uint8_t data[10];
207 #ifdef WORDS_BIGENDIAN
208 int i = 10;
209 #endif
210 if (stream_read (s, data, 10) != 10)
211 return EBML_FLOAT_INVALID;
212 #ifndef WORDS_BIGENDIAN
213 value = * (long double *) data;
214 #else
215 while (i--)
216 ((uint8_t *) &value)[i] = data[9 - i];
217 #endif
218 break;
219 }
220
221 default:
222 return EBML_FLOAT_INVALID;
223 }
224
225 if (length)
226 *length = len + l;
227
228 return value;
229 }
230
231 /*
232 * Read the next element as an ASCII string.
233 */
234 char *
235 ebml_read_ascii (stream_t *s, uint64_t *length)
236 {
237 uint64_t len;
238 char *str;
239 int l;
240
241 len = ebml_read_length (s, &l);
242 if (len == EBML_UINT_INVALID)
243 return NULL;
244 if (length)
245 *length = len + l;
246
247 str = (char *) malloc (len+1);
248 if (stream_read(s, str, len) != (int) len)
249 {
250 free (str);
251 return NULL;
252 }
253 str[len] = '\0';
254
255 return str;
256 }
257
258 /*
259 * Read the next element as a UTF-8 string.
260 */
261 char *
262 ebml_read_utf8 (stream_t *s, uint64_t *length)
263 {
264 return ebml_read_ascii (s, length);
265 }
266
267 /*
268 * Skip the next element.
269 */
270 int
271 ebml_read_skip (stream_t *s, uint64_t *length)
272 {
273 uint64_t len;
274 int l;
275
276 len = ebml_read_length (s, &l);
277 if (len == EBML_UINT_INVALID)
278 return 1;
279 if (length)
280 *length = len + l;
281
282 stream_skip(s, len);
283
284 return 0;
285 }
286
287 /*
288 * Read the next element, but only the header. The contents
289 * are supposed to be sub-elements which can be read separately.
290 */
291 uint32_t
292 ebml_read_master (stream_t *s, uint64_t *length)
293 {
294 uint64_t len;
295 uint32_t id;
296
297 id = ebml_read_id (s, NULL);
298 if (id == EBML_ID_INVALID)
299 return id;
300
301 len = ebml_read_length (s, NULL);
302 if (len == EBML_UINT_INVALID)
303 return EBML_ID_INVALID;
304 if (length)
305 *length = len;
306
307 return id;
308 }
309
310
311 /*
312 * Read an EBML header.
313 */
314 char *
315 ebml_read_header (stream_t *s, int *version)
316 {
317 uint64_t length, l, num;
318 uint32_t id;
319 char *str = NULL;
320
321 if (ebml_read_master (s, &length) != EBML_ID_HEADER)
322 return 0;
323
324 if (version)
325 *version = 1;
326
327 while (length > 0)
328 {
329 id = ebml_read_id (s, NULL);
330 if (id == EBML_ID_INVALID)
331 return NULL;
332 length -= 2;
333
334 switch (id)
335 {
336 /* is our read version uptodate? */
337 case EBML_ID_EBMLREADVERSION:
338 num = ebml_read_uint (s, &l);
339 if (num != EBML_VERSION)
340 return NULL;
341 break;
342
343 /* we only handle 8 byte lengths at max */
344 case EBML_ID_EBMLMAXSIZELENGTH:
345 num = ebml_read_uint (s, &l);
346 if (num != sizeof (uint64_t))
347 return NULL;
348 break;
349
350 /* we handle 4 byte IDs at max */
351 case EBML_ID_EBMLMAXIDLENGTH:
352 num = ebml_read_uint (s, &l);
353 if (num != sizeof (uint32_t))
354 return NULL;
355 break;
356
357 case EBML_ID_DOCTYPE:
358 str = ebml_read_ascii (s, &l);
359 if (str == NULL)
360 return NULL;
361 break;
362
363 case EBML_ID_DOCTYPEREADVERSION:
364 num = ebml_read_uint (s, &l);
365 if (num == EBML_UINT_INVALID)
366 return NULL;
367 if (version)
368 *version = num;
369 break;
370
371 /* we ignore these two, they don't tell us anything we care about */
372 case EBML_ID_VOID:
373 case EBML_ID_EBMLVERSION:
374 case EBML_ID_DOCTYPEVERSION:
375 default:
376 if (ebml_read_skip (s, &l))
377 return NULL;
378 break;
379 }
380 length -= l;
381 }
382
383 return str;
384 }
385
386 #endif /* HAVE_MATROSKA */