Mercurial > mplayer.hg
comparison subreader.c @ 31686:b41cbf02f854
subtitles: convert SRT/MicroDVD markup into ASS markup
As the title says. If libass support is enabled, in SRT subs, HTML-style
markup (<b>, </i>, </font>, etc.) is converted to ASS tags. Similarly,
MicroDVD-style markup is converted.
Patch by ubitux, ubitux AT gmail DOT com.
author | greg |
---|---|
date | Sun, 18 Jul 2010 22:00:48 +0000 |
parents | dc26022e9c27 |
children | 8f7554cf4441 |
comparison
equal
deleted
inserted
replaced
31685:31b6397e3b28 | 31686:b41cbf02f854 |
---|---|
31 | 31 |
32 #include "config.h" | 32 #include "config.h" |
33 #include "mp_msg.h" | 33 #include "mp_msg.h" |
34 #include "mpcommon.h" | 34 #include "mpcommon.h" |
35 #include "subreader.h" | 35 #include "subreader.h" |
36 #include "subassconvert.h" | |
36 #include "libvo/sub.h" | 37 #include "libvo/sub.h" |
37 #include "stream/stream.h" | 38 #include "stream/stream.h" |
38 #include "libavutil/common.h" | 39 #include "libavutil/common.h" |
39 #include "libavutil/avstring.h" | 40 #include "libavutil/avstring.h" |
41 #include "libass/ass_mp.h" | |
40 | 42 |
41 #ifdef CONFIG_ENCA | 43 #ifdef CONFIG_ENCA |
42 #include <enca.h> | 44 #include <enca.h> |
43 #endif | 45 #endif |
44 | 46 |
294 &(current->start), line2) < 2) && | 296 &(current->start), line2) < 2) && |
295 (sscanf (line, | 297 (sscanf (line, |
296 "{%ld}{%ld}%[^\r\n]", | 298 "{%ld}{%ld}%[^\r\n]", |
297 &(current->start), &(current->end), line2) < 3)); | 299 &(current->start), &(current->end), line2) < 3)); |
298 | 300 |
299 p=line2; | 301 if (ass_enabled) { |
302 subassconvert_microdvd(line2, line, LINE_LEN + 1); | |
303 p = line; | |
304 } else | |
305 p = line2; | |
300 | 306 |
301 next=p, i=0; | 307 next=p, i=0; |
302 while ((next =sub_readtext (next, &(current->text[i])))) { | 308 while ((next =sub_readtext (next, &(current->text[i])))) { |
303 if (current->text[i]==ERR) {return ERR;} | 309 if (current->text[i]==ERR) {return ERR;} |
304 i++; | 310 i++; |
363 break; | 369 break; |
364 } | 370 } |
365 return current; | 371 return current; |
366 } | 372 } |
367 | 373 |
374 static subtitle *sub_ass_read_line_subviewer(stream_t *st, subtitle *current, int utf16) | |
375 { | |
376 int h1, m1, s1, ms1, h2, m2, s2, ms2, j = 0; | |
377 | |
378 while (!current->text[0]) { | |
379 char line[LINE_LEN + 1], full_line[LINE_LEN + 1], sep; | |
380 int i; | |
381 | |
382 /* Parse SubRip header */ | |
383 if (!stream_read_line(st, line, LINE_LEN, utf16)) | |
384 return NULL; | |
385 if (sscanf(line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d", | |
386 &h1, &m1, &s1, &sep, &ms1, &h2, &m2, &s2, &sep, &ms2) < 10) | |
387 continue; | |
388 | |
389 current->start = h1 * 360000 + m1 * 6000 + s1 * 100 + ms1 / 10; | |
390 current->end = h2 * 360000 + m2 * 6000 + s2 * 100 + ms2 / 10; | |
391 | |
392 /* Concat lines */ | |
393 full_line[0] = 0; | |
394 for (i = 0; i < SUB_MAX_TEXT; i++) { | |
395 int blank = 1, len = 0; | |
396 char *p; | |
397 | |
398 if (!stream_read_line(st, line, LINE_LEN, utf16)) | |
399 break; | |
400 | |
401 for (p = line; *p != '\n' && *p != '\r' && *p; p++, len++) | |
402 if (*p != ' ' && *p != '\t') | |
403 blank = 0; | |
404 | |
405 if (blank) | |
406 break; | |
407 | |
408 *p = 0; | |
409 | |
410 if (len >= sizeof(full_line) - j - 2) | |
411 break; | |
412 | |
413 if (j != 0) | |
414 full_line[j++] = '\n'; | |
415 strcpy(&full_line[j], line); | |
416 j += len; | |
417 } | |
418 | |
419 /* Use the ASS/SSA converter to transform the whole lines */ | |
420 if (full_line[0]) { | |
421 char converted_line[LINE_LEN + 1]; | |
422 subassconvert_subrip(full_line, converted_line, LINE_LEN + 1); | |
423 current->text[0] = strdup(converted_line); | |
424 current->lines = 1; | |
425 } | |
426 } | |
427 return current; | |
428 } | |
429 | |
368 static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current, int utf16) { | 430 static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current, int utf16) { |
369 char line[LINE_LEN+1]; | 431 char line[LINE_LEN+1]; |
370 int a1,a2,a3,a4,b1,b2,b3,b4; | 432 int a1,a2,a3,a4,b1,b2,b3,b4; |
371 char *p=NULL; | 433 char *p=NULL; |
372 int i,len; | 434 int i,len; |
373 | 435 |
436 if (ass_enabled) | |
437 return sub_ass_read_line_subviewer(st, current, utf16); | |
374 while (!current->text[0]) { | 438 while (!current->text[0]) { |
375 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; | 439 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; |
376 if ((len=sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1,&a2,&a3,(char *)&i,&a4,&b1,&b2,&b3,(char *)&i,&b4)) < 10) | 440 if ((len=sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1,&a2,&a3,(char *)&i,&a4,&b1,&b2,&b3,(char *)&i,&b4)) < 10) |
377 continue; | 441 continue; |
378 current->start = a1*360000+a2*6000+a3*100+a4/10; | 442 current->start = a1*360000+a2*6000+a3*100+a4/10; |
2288 * \brief parse text and append it to subtitle in sub | 2352 * \brief parse text and append it to subtitle in sub |
2289 * \param sub subtitle struct to add text to | 2353 * \param sub subtitle struct to add text to |
2290 * \param txt text to parse | 2354 * \param txt text to parse |
2291 * \param len length of text in txt | 2355 * \param len length of text in txt |
2292 * \param endpts pts at which this subtitle text should be removed again | 2356 * \param endpts pts at which this subtitle text should be removed again |
2357 * \param strip_markup if strip markup is set (!= 0), markup tags like <b></b> are ignored | |
2293 * | 2358 * |
2294 * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r' | 2359 * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r' |
2295 * and '\0' are interpreted as newlines, duplicate, leading and trailing | 2360 * and '\0' are interpreted as newlines, duplicate, leading and trailing |
2296 * newlines are ignored. | 2361 * newlines are ignored. |
2297 */ | 2362 */ |
2298 void sub_add_text(subtitle *sub, const char *txt, int len, double endpts) { | 2363 void sub_add_text(subtitle *sub, const char *txt, int len, double endpts, int strip_markup) { |
2299 int comment = 0; | 2364 int comment = 0; |
2300 int double_newline = 1; // ignore newlines at the beginning | 2365 int double_newline = 1; // ignore newlines at the beginning |
2301 int i, pos; | 2366 int i, pos; |
2302 char *buf; | 2367 char *buf; |
2303 #ifdef CONFIG_FRIBIDI | 2368 #ifdef CONFIG_FRIBIDI |
2306 if (sub->lines >= SUB_MAX_TEXT) return; | 2371 if (sub->lines >= SUB_MAX_TEXT) return; |
2307 pos = 0; | 2372 pos = 0; |
2308 buf = malloc(MAX_SUBLINE + 1); | 2373 buf = malloc(MAX_SUBLINE + 1); |
2309 sub->text[sub->lines] = buf; | 2374 sub->text[sub->lines] = buf; |
2310 sub->endpts[sub->lines] = endpts; | 2375 sub->endpts[sub->lines] = endpts; |
2311 for (i = 0; i < len && pos < MAX_SUBLINE; i++) { | 2376 |
2312 char c = txt[i]; | 2377 if (!strip_markup) { |
2313 if (c == '<') comment |= 1; | 2378 subassconvert_subrip(txt, buf, MAX_SUBLINE + 1); |
2314 if (c == '{') comment |= 2; | 2379 sub->text[sub->lines] = buf; |
2315 if (comment) { | 2380 } else { |
2316 if (c == '}') comment &= ~2; | 2381 for (i = 0; i < len && pos < MAX_SUBLINE; i++) { |
2317 if (c == '>') comment &= ~1; | 2382 char c = txt[i]; |
2318 continue; | 2383 if (c == '<') comment |= 1; |
2319 } | 2384 if (c == '{') comment |= 2; |
2320 if (pos == MAX_SUBLINE - 1) { | 2385 if (comment) { |
2321 i--; | 2386 if (c == '}') comment &= ~2; |
2322 c = 0; | 2387 if (c == '>') comment &= ~1; |
2323 } | 2388 continue; |
2324 if (c == '\\' && i + 1 < len) { | |
2325 c = txt[++i]; | |
2326 if (c == 'n' || c == 'N') c = 0; | |
2327 } | |
2328 if (c == '\n' || c == '\r') c = 0; | |
2329 if (c) { | |
2330 double_newline = 0; | |
2331 buf[pos++] = c; | |
2332 } else if (!double_newline) { | |
2333 if (sub->lines >= SUB_MAX_TEXT - 1) { | |
2334 mp_msg(MSGT_VO, MSGL_WARN, "Too many subtitle lines\n"); | |
2335 break; | |
2336 } | 2389 } |
2337 double_newline = 1; | 2390 if (pos == MAX_SUBLINE - 1) { |
2338 buf[pos] = 0; | 2391 i--; |
2339 sub->lines++; | 2392 c = 0; |
2340 pos = 0; | 2393 } |
2341 buf = malloc(MAX_SUBLINE + 1); | 2394 if (c == '\\' && i + 1 < len) { |
2342 sub->text[sub->lines] = buf; | 2395 c = txt[++i]; |
2343 sub->endpts[sub->lines] = endpts; | 2396 if (c == 'n' || c == 'N') c = 0; |
2344 } | 2397 } |
2398 if (c == '\n' || c == '\r') c = 0; | |
2399 if (c) { | |
2400 double_newline = 0; | |
2401 buf[pos++] = c; | |
2402 } else if (!double_newline) { | |
2403 if (sub->lines >= SUB_MAX_TEXT - 1) { | |
2404 mp_msg(MSGT_VO, MSGL_WARN, "Too many subtitle lines\n"); | |
2405 break; | |
2406 } | |
2407 double_newline = 1; | |
2408 buf[pos] = 0; | |
2409 sub->lines++; | |
2410 pos = 0; | |
2411 buf = malloc(MAX_SUBLINE + 1); | |
2412 sub->text[sub->lines] = buf; | |
2413 sub->endpts[sub->lines] = endpts; | |
2414 } | |
2415 } | |
2416 buf[pos] = 0; | |
2345 } | 2417 } |
2346 buf[pos] = 0; | |
2347 if (sub->lines < SUB_MAX_TEXT && | 2418 if (sub->lines < SUB_MAX_TEXT && |
2348 strlen(sub->text[sub->lines])) | 2419 strlen(sub->text[sub->lines])) |
2349 sub->lines++; | 2420 sub->lines++; |
2350 #ifdef CONFIG_FRIBIDI | 2421 #ifdef CONFIG_FRIBIDI |
2351 sub = sub_fribidi(sub, sub_utf8, orig_lines); | 2422 sub = sub_fribidi(sub, sub_utf8, orig_lines); |