Mercurial > mplayer.hg
comparison libass/ass.c @ 19492:c8daf3471201
SSA/ASS parser reworked, with 2 main results:
support for script embedded fonts (fonts, uuencoded directly into script) added;
matroska interface functions have got more sensible names.
author | eugeni |
---|---|
date | Tue, 22 Aug 2006 22:11:01 +0000 |
parents | 07209f48e527 |
children | 31ac2e1a5695 |
comparison
equal
deleted
inserted
replaced
19491:10d8f2cae948 | 19492:c8daf3471201 |
---|---|
16 | 16 |
17 #ifdef USE_ICONV | 17 #ifdef USE_ICONV |
18 #include <iconv.h> | 18 #include <iconv.h> |
19 extern char *sub_cp; | 19 extern char *sub_cp; |
20 #endif | 20 #endif |
21 extern int extract_embedded_fonts; | |
21 | 22 |
22 #include "mp_msg.h" | 23 #include "mp_msg.h" |
23 #include "ass.h" | 24 #include "ass.h" |
24 #include "ass_utils.h" | 25 #include "ass_utils.h" |
25 #include "libvo/sub.h" // for utf8_get_char | 26 #include "libvo/sub.h" // for utf8_get_char |
26 | 27 |
27 char *get_path(char *); | 28 char *get_path(char *); |
28 | 29 |
30 struct parser_priv_s { | |
31 enum {PST_UNKNOWN = 0, PST_INFO, PST_STYLES, PST_EVENTS, PST_FONTS} state; | |
32 char* fontname; | |
33 char* fontdata; | |
34 int fontdata_size; | |
35 int fontdata_used; | |
36 }; | |
37 | |
29 #define ASS_STYLES_ALLOC 20 | 38 #define ASS_STYLES_ALLOC 20 |
30 #define ASS_EVENTS_ALLOC 200 | 39 #define ASS_EVENTS_ALLOC 200 |
31 | 40 |
32 void ass_free_track(ass_track_t* track) { | 41 void ass_free_track(ass_track_t* track) { |
33 int i; | 42 int i; |
34 | 43 |
44 if (track->parser_priv) { | |
45 if (track->parser_priv->fontname) | |
46 free(track->parser_priv->fontname); | |
47 if (track->parser_priv->fontdata) | |
48 free(track->parser_priv->fontdata); | |
49 free(track->parser_priv); | |
50 } | |
35 if (track->style_format) | 51 if (track->style_format) |
36 free(track->style_format); | 52 free(track->style_format); |
37 if (track->event_format) | 53 if (track->event_format) |
38 free(track->event_format); | 54 free(track->event_format); |
39 if (track->styles) { | 55 if (track->styles) { |
377 free(format); | 393 free(format); |
378 return 0; | 394 return 0; |
379 | 395 |
380 } | 396 } |
381 | 397 |
398 static int process_styles_line(ass_track_t* track, char *str) | |
399 { | |
400 if (!strncmp(str,"Format:", 7)) { | |
401 char* p = str + 7; | |
402 skip_spaces(&p); | |
403 track->style_format = strdup(p); | |
404 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format); | |
405 } else if (!strncmp(str,"Style:", 6)) { | |
406 char* p = str + 6; | |
407 skip_spaces(&p); | |
408 process_style(track, p); | |
409 } | |
410 return 0; | |
411 } | |
412 | |
413 static int process_info_line(ass_track_t* track, char *str) | |
414 { | |
415 if (!strncmp(str, "PlayResX:", 9)) { | |
416 track->PlayResX = atoi(str + 9); | |
417 } else if (!strncmp(str,"PlayResY:", 9)) { | |
418 track->PlayResY = atoi(str + 9); | |
419 } else if (!strncmp(str,"Timer:", 6)) { | |
420 track->Timer = atof(str + 6); | |
421 } else if (!strncmp(str,"WrapStyle:", 10)) { | |
422 track->WrapStyle = atoi(str + 10); | |
423 } | |
424 return 0; | |
425 } | |
426 | |
427 static int process_events_line(ass_track_t* track, char *str) | |
428 { | |
429 if (!strncmp(str, "Format:", 7)) { | |
430 char* p = str + 7; | |
431 skip_spaces(&p); | |
432 track->event_format = strdup(p); | |
433 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format); | |
434 } else if (!strncmp(str, "Dialogue:", 9)) { | |
435 // This should never be reached for embedded subtitles. | |
436 // They have slightly different format and are parsed in ass_process_chunk, | |
437 // called directly from demuxer | |
438 int eid; | |
439 ass_event_t* event; | |
440 | |
441 str += 9; | |
442 skip_spaces(&str); | |
443 | |
444 eid = ass_alloc_event(track); | |
445 event = track->events + eid; | |
446 | |
447 process_event_tail(track, event, str, 0); | |
448 } else { | |
449 mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str); | |
450 } | |
451 return 0; | |
452 } | |
453 | |
454 // Copied from mkvtoolnix | |
455 static unsigned char* decode_chars(unsigned char c1, unsigned char c2, | |
456 unsigned char c3, unsigned char c4, unsigned char* dst, int cnt) | |
457 { | |
458 uint32_t value; | |
459 unsigned char bytes[3]; | |
460 int i; | |
461 | |
462 value = ((c1 - 33) << 18) + ((c2 - 33) << 12) + ((c3 - 33) << 6) + (c4 - 33); | |
463 bytes[2] = value & 0xff; | |
464 bytes[1] = (value & 0xff00) >> 8; | |
465 bytes[0] = (value & 0xff0000) >> 16; | |
466 | |
467 for (i = 0; i < cnt; ++i) | |
468 *dst++ = bytes[i]; | |
469 return dst; | |
470 } | |
471 | |
472 static int decode_font(ass_track_t* track) | |
473 { | |
474 unsigned char* p; | |
475 unsigned char* q; | |
476 int i; | |
477 int size; // original size | |
478 int dsize; // decoded size | |
479 unsigned char* buf = 0; | |
480 | |
481 mp_msg(MSGT_GLOBAL, MSGL_V, "font: %d bytes encoded data \n", track->parser_priv->fontdata_used); | |
482 size = track->parser_priv->fontdata_used; | |
483 if (size % 4 == 1) { | |
484 mp_msg(MSGT_GLOBAL, MSGL_ERR, "bad encoded data size\n"); | |
485 goto error_decode_font; | |
486 } | |
487 buf = malloc(size / 4 * 3 + 2); | |
488 q = buf; | |
489 for (i = 0, p = (unsigned char*)track->parser_priv->fontdata; i < size / 4; i++, p+=4) { | |
490 q = decode_chars(p[0], p[1], p[2], p[3], q, 3); | |
491 } | |
492 if (size % 4 == 2) { | |
493 q = decode_chars(p[0], p[1], 0, 0, q, 1); | |
494 } else if (size % 4 == 3) { | |
495 q = decode_chars(p[0], p[1], p[2], 0, q, 2); | |
496 } | |
497 dsize = q - buf; | |
498 assert(dsize <= size / 4 * 3 + 2); | |
499 | |
500 if (extract_embedded_fonts) | |
501 ass_process_font(track->parser_priv->fontname, (char*)buf, dsize); | |
502 | |
503 error_decode_font: | |
504 if (buf) free(buf); | |
505 free(track->parser_priv->fontname); | |
506 free(track->parser_priv->fontdata); | |
507 track->parser_priv->fontname = 0; | |
508 track->parser_priv->fontdata = 0; | |
509 track->parser_priv->fontdata_size = 0; | |
510 track->parser_priv->fontdata_used = 0; | |
511 return 0; | |
512 } | |
513 | |
514 static char* validate_fname(char* name); | |
515 | |
516 static int process_fonts_line(ass_track_t* track, char *str) | |
517 { | |
518 int len; | |
519 | |
520 if (!strncmp(str, "fontname:", 9)) { | |
521 char* p = str + 9; | |
522 skip_spaces(&p); | |
523 if (track->parser_priv->fontname) { | |
524 decode_font(track); | |
525 } | |
526 track->parser_priv->fontname = validate_fname(p); | |
527 mp_msg(MSGT_GLOBAL, MSGL_V, "fontname: %s\n", track->parser_priv->fontname); | |
528 return 0; | |
529 } | |
530 | |
531 if (!track->parser_priv->fontname) { | |
532 mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str); | |
533 return 0; | |
534 } | |
535 | |
536 len = strlen(str); | |
537 if (len > 80) { | |
538 mp_msg(MSGT_GLOBAL, MSGL_WARN, "Font line too long: %d, %s\n", len, str); | |
539 return 0; | |
540 } | |
541 if (track->parser_priv->fontdata_used + len > track->parser_priv->fontdata_size) { | |
542 track->parser_priv->fontdata_size += 100 * 1024; | |
543 track->parser_priv->fontdata = realloc(track->parser_priv->fontdata, track->parser_priv->fontdata_size); | |
544 } | |
545 memcpy(track->parser_priv->fontdata + track->parser_priv->fontdata_used, str, len); | |
546 track->parser_priv->fontdata_used += len; | |
547 | |
548 return 0; | |
549 } | |
550 | |
382 /** | 551 /** |
383 * \brief Parse a header line | 552 * \brief Parse a header line |
384 * \param track track | 553 * \param track track |
385 * \param str string to parse, zero-terminated | 554 * \param str string to parse, zero-terminated |
386 */ | 555 */ |
387 static int process_header_line(ass_track_t* track, char *str) | 556 static int process_line(ass_track_t* track, char *str) |
388 { | 557 { |
389 static int events_section_started = 0; | 558 if (strstr(str, "[Script Info]")) { // FIXME: strstr to skip possible BOM at the beginning of the script |
390 | 559 track->parser_priv->state = PST_INFO; |
391 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "=== Header: %s\n", str); | 560 } else if (!strncmp(str, "[V4 Styles]", 11)) { |
392 if (strncmp(str, "PlayResX:", 9)==0) { | 561 track->parser_priv->state = PST_STYLES; |
393 track->PlayResX = atoi(str + 9); | 562 track->track_type = TRACK_TYPE_SSA; |
394 } else if (strncmp(str,"PlayResY:", 9)==0) { | 563 } else if (!strncmp(str, "[V4+ Styles]", 12)) { |
395 track->PlayResY = atoi(str + 9); | 564 track->parser_priv->state = PST_STYLES; |
396 } else if (strncmp(str,"Timer:", 6)==0) { | 565 track->track_type = TRACK_TYPE_ASS; |
397 track->Timer = atof(str + 6); | 566 } else if (!strncmp(str, "[Events]", 8)) { |
398 } else if (strstr(str,"Styles]")) { | 567 track->parser_priv->state = PST_EVENTS; |
399 events_section_started = 0; | 568 } else if (!strncmp(str, "[Fonts]", 7)) { |
400 if (strchr(str, '+')) | 569 track->parser_priv->state = PST_FONTS; |
401 track->track_type = TRACK_TYPE_ASS; | 570 } else { |
402 else | 571 switch (track->parser_priv->state) { |
403 track->track_type = TRACK_TYPE_SSA; | 572 case PST_INFO: |
404 } else if (strncmp(str,"[Events]", 8)==0) { | 573 process_info_line(track, str); |
405 events_section_started = 1; | 574 break; |
406 } else if (strncmp(str,"Format:", 7)==0) { | 575 case PST_STYLES: |
407 char* p = str + 7; | 576 process_styles_line(track, str); |
408 skip_spaces(&p); | 577 break; |
409 if (events_section_started) { | 578 case PST_EVENTS: |
410 track->event_format = strdup(p); | 579 process_events_line(track, str); |
411 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format); | 580 break; |
412 } else { | 581 case PST_FONTS: |
413 track->style_format = strdup(p); | 582 process_fonts_line(track, str); |
414 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format); | 583 break; |
584 default: | |
585 break; | |
415 } | 586 } |
416 } else if (strncmp(str,"Style:", 6)==0) { | 587 } |
417 char* p = str + 6; | 588 |
418 skip_spaces(&p); | 589 // there is no explicit end-of-font marker in ssa/ass |
419 process_style(track, p); | 590 if ((track->parser_priv->state != PST_FONTS) && (track->parser_priv->fontname)) |
420 } else if (strncmp(str,"WrapStyle:", 10)==0) { | 591 decode_font(track); |
421 track->WrapStyle = atoi(str + 10); | 592 |
422 } | |
423 return 0; | 593 return 0; |
424 } | 594 } |
425 | 595 |
426 /** | 596 static int process_text(ass_track_t* track, char* str) |
427 * \brief Process CodecPrivate section of subtitle stream | 597 { |
428 * \param track track | 598 char* p = str; |
429 * \param data string to parse | |
430 * \param size length of data | |
431 CodecPrivate section contains [Stream Info] and [V4+ Styles] sections | |
432 */ | |
433 void ass_process_chunk(ass_track_t* track, char *data, int size) | |
434 { | |
435 char* str = malloc(size + 1); | |
436 char* p; | |
437 int sid; | |
438 | |
439 memcpy(str, data, size); | |
440 str[size] = '\0'; | |
441 | |
442 p = str; | |
443 while(1) { | 599 while(1) { |
444 char* q; | 600 char* q; |
445 for (;((*p=='\r')||(*p=='\n'));++p) {} | 601 for (;((*p=='\r')||(*p=='\n'));++p) {} |
446 for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {}; | 602 for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {}; |
447 if (q==p) | 603 if (q==p) |
448 break; | 604 break; |
449 if (*q != '\0') | 605 if (*q != '\0') |
450 *(q++) = '\0'; | 606 *(q++) = '\0'; |
451 process_header_line(track, p); | 607 process_line(track, p); |
452 if (*q == '\0') | 608 if (*q == '\0') |
453 break; | 609 break; |
454 p = q; | 610 p = q; |
455 } | 611 } |
612 return 0; | |
613 } | |
614 | |
615 /** | |
616 * \brief Process CodecPrivate section of subtitle stream | |
617 * \param track track | |
618 * \param data string to parse | |
619 * \param size length of data | |
620 CodecPrivate section contains [Stream Info] and [V4+ Styles] ([V4 Styles] for SSA) sections | |
621 */ | |
622 void ass_process_codec_private(ass_track_t* track, char *data, int size) | |
623 { | |
624 char* str = malloc(size + 1); | |
625 int sid; | |
626 | |
627 memcpy(str, data, size); | |
628 str[size] = '\0'; | |
629 | |
630 process_text(track, str); | |
456 free(str); | 631 free(str); |
457 | 632 |
458 // add "Default" style to the end | 633 // add "Default" style to the end |
459 // will be used if track does not contain a default style (or even does not contain styles at all) | 634 // will be used if track does not contain a default style (or even does not contain styles at all) |
460 sid = ass_alloc_style(track); | 635 sid = ass_alloc_style(track); |
462 track->styles[sid].FontName = strdup("Arial"); | 637 track->styles[sid].FontName = strdup("Arial"); |
463 | 638 |
464 if (!track->event_format) { | 639 if (!track->event_format) { |
465 // probably an mkv produced by ancient mkvtoolnix | 640 // probably an mkv produced by ancient mkvtoolnix |
466 // such files don't have [Events] and Format: headers | 641 // such files don't have [Events] and Format: headers |
642 track->parser_priv->state = PST_EVENTS; | |
467 if (track->track_type == TRACK_TYPE_SSA) | 643 if (track->track_type == TRACK_TYPE_SSA) |
468 track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"); | 644 track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"); |
469 else | 645 else |
470 track->event_format = strdup("Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text"); | 646 track->event_format = strdup("Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text"); |
471 } | 647 } |
486 * \param data string to parse | 662 * \param data string to parse |
487 * \param size length of data | 663 * \param size length of data |
488 * \param timecode starting time of the event (milliseconds) | 664 * \param timecode starting time of the event (milliseconds) |
489 * \param duration duration of the event (milliseconds) | 665 * \param duration duration of the event (milliseconds) |
490 */ | 666 */ |
491 void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration) | 667 void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration) |
492 { | 668 { |
493 char* str; | 669 char* str; |
494 int eid; | 670 int eid; |
495 char* p; | 671 char* p; |
496 char* token; | 672 char* token; |
531 } while (0); | 707 } while (0); |
532 // some error | 708 // some error |
533 ass_free_event(track, eid); | 709 ass_free_event(track, eid); |
534 track->n_events--; | 710 track->n_events--; |
535 free(str); | 711 free(str); |
536 } | |
537 | |
538 /** | |
539 * \brief Process a line from external file. | |
540 * \param track track | |
541 * \param str string to parse | |
542 * \param size length of data | |
543 */ | |
544 static void ass_process_external_line(ass_track_t* track, char *str, int size) | |
545 { | |
546 int eid; | |
547 ass_event_t* event; | |
548 | |
549 eid = ass_alloc_event(track); | |
550 event = track->events + eid; | |
551 | |
552 if (strncmp("Dialogue:", str, 9) != 0) | |
553 return; | |
554 | |
555 str += 9; | |
556 while (*str == ' ') {++str;} | |
557 | |
558 process_event_tail(track, event, str, 0); | |
559 } | 712 } |
560 | 713 |
561 #ifdef USE_ICONV | 714 #ifdef USE_ICONV |
562 /** \brief recode buffer to utf-8 | 715 /** \brief recode buffer to utf-8 |
563 * constraint: sub_cp != 0 | 716 * constraint: sub_cp != 0 |
639 { | 792 { |
640 int res; | 793 int res; |
641 long sz; | 794 long sz; |
642 long bytes_read; | 795 long bytes_read; |
643 char* buf; | 796 char* buf; |
644 char* p; | |
645 int events_reached; | |
646 ass_track_t* track; | 797 ass_track_t* track; |
647 | 798 |
648 FILE* fp = fopen(fname, "rb"); | 799 FILE* fp = fopen(fname, "rb"); |
649 if (!fp) { | 800 if (!fp) { |
650 mp_msg(MSGT_GLOBAL, MSGL_WARN, "ass_read_file(%s): fopen failed\n", fname); | 801 mp_msg(MSGT_GLOBAL, MSGL_WARN, "ass_read_file(%s): fopen failed\n", fname); |
696 | 847 |
697 track = ass_new_track(); | 848 track = ass_new_track(); |
698 track->name = strdup(fname); | 849 track->name = strdup(fname); |
699 | 850 |
700 // process header | 851 // process header |
701 events_reached = 0; | 852 process_text(track, buf); |
702 p = buf; | 853 |
703 while (p && (*p)) { | 854 // there is no explicit end-of-font marker in ssa/ass |
704 while (*p == '\n') {++p;} | 855 if (track->parser_priv->fontname) |
705 if (strncmp(p, "[Events]", 8) == 0) { | 856 decode_font(track); |
706 events_reached = 1; | 857 |
707 } else if ((strncmp(p, "Format:", 7) == 0) && (events_reached)) { | |
708 p = strchr(p, '\n'); | |
709 if (p == 0) { | |
710 mp_msg(MSGT_GLOBAL, MSGL_WARN, "Incomplete subtitles\n"); | |
711 free(buf); | |
712 return 0; | |
713 } | |
714 ass_process_chunk(track, buf, p - buf + 1); | |
715 ++p; | |
716 break; | |
717 } | |
718 p = strchr(p, '\n'); | |
719 } | |
720 // process events | |
721 while (p && (*p)) { | |
722 char* next; | |
723 int len; | |
724 while (*p == '\n') {++p;} | |
725 next = strchr(p, '\n'); | |
726 len = 0; | |
727 if (next) { | |
728 len = next - p; | |
729 *next = 0; | |
730 } else { | |
731 len = strlen(p); | |
732 } | |
733 ass_process_external_line(track, p, len); | |
734 if (next) { | |
735 p = next + 1; | |
736 continue; | |
737 } else | |
738 break; | |
739 } | |
740 | |
741 free(buf); | 858 free(buf); |
742 | 859 |
743 if (!events_reached) { | 860 if (track->track_type == TRACK_TYPE_UNKNOWN) { |
744 ass_free_track(track); | 861 ass_free_track(track); |
745 return 0; | 862 return 0; |
746 } | 863 } |
747 | 864 |
748 mp_msg(MSGT_GLOBAL, MSGL_INFO, "LIBASS: added subtitle file: %s (%d styles, %d events)\n", fname, track->n_styles, track->n_events); | 865 mp_msg(MSGT_GLOBAL, MSGL_INFO, "LIBASS: added subtitle file: %s (%d styles, %d events)\n", fname, track->n_styles, track->n_events); |
851 return ((long long)track->events[i].Start) - now; | 968 return ((long long)track->events[i].Start) - now; |
852 } | 969 } |
853 | 970 |
854 ass_track_t* ass_new_track(void) { | 971 ass_track_t* ass_new_track(void) { |
855 ass_track_t* track = calloc(1, sizeof(ass_track_t)); | 972 ass_track_t* track = calloc(1, sizeof(ass_track_t)); |
973 track->parser_priv = calloc(1, sizeof(parser_priv_t)); | |
856 return track; | 974 return track; |
857 } | 975 } |
858 | 976 |