comparison libass/ass.c @ 19492:c8daf3471201

SSA/ASS parser reworked, with 2 main results: support for script embedded fonts (fonts, uuencoded directly into script) added; matroska interface functions have got more sensible names.
author eugeni
date Tue, 22 Aug 2006 22:11:01 +0000
parents 07209f48e527
children 31ac2e1a5695
comparison
equal deleted inserted replaced
19491:10d8f2cae948 19492:c8daf3471201
16 16
17 #ifdef USE_ICONV 17 #ifdef USE_ICONV
18 #include <iconv.h> 18 #include <iconv.h>
19 extern char *sub_cp; 19 extern char *sub_cp;
20 #endif 20 #endif
21 extern int extract_embedded_fonts;
21 22
22 #include "mp_msg.h" 23 #include "mp_msg.h"
23 #include "ass.h" 24 #include "ass.h"
24 #include "ass_utils.h" 25 #include "ass_utils.h"
25 #include "libvo/sub.h" // for utf8_get_char 26 #include "libvo/sub.h" // for utf8_get_char
26 27
27 char *get_path(char *); 28 char *get_path(char *);
28 29
30 struct parser_priv_s {
31 enum {PST_UNKNOWN = 0, PST_INFO, PST_STYLES, PST_EVENTS, PST_FONTS} state;
32 char* fontname;
33 char* fontdata;
34 int fontdata_size;
35 int fontdata_used;
36 };
37
29 #define ASS_STYLES_ALLOC 20 38 #define ASS_STYLES_ALLOC 20
30 #define ASS_EVENTS_ALLOC 200 39 #define ASS_EVENTS_ALLOC 200
31 40
32 void ass_free_track(ass_track_t* track) { 41 void ass_free_track(ass_track_t* track) {
33 int i; 42 int i;
34 43
44 if (track->parser_priv) {
45 if (track->parser_priv->fontname)
46 free(track->parser_priv->fontname);
47 if (track->parser_priv->fontdata)
48 free(track->parser_priv->fontdata);
49 free(track->parser_priv);
50 }
35 if (track->style_format) 51 if (track->style_format)
36 free(track->style_format); 52 free(track->style_format);
37 if (track->event_format) 53 if (track->event_format)
38 free(track->event_format); 54 free(track->event_format);
39 if (track->styles) { 55 if (track->styles) {
377 free(format); 393 free(format);
378 return 0; 394 return 0;
379 395
380 } 396 }
381 397
398 static int process_styles_line(ass_track_t* track, char *str)
399 {
400 if (!strncmp(str,"Format:", 7)) {
401 char* p = str + 7;
402 skip_spaces(&p);
403 track->style_format = strdup(p);
404 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format);
405 } else if (!strncmp(str,"Style:", 6)) {
406 char* p = str + 6;
407 skip_spaces(&p);
408 process_style(track, p);
409 }
410 return 0;
411 }
412
413 static int process_info_line(ass_track_t* track, char *str)
414 {
415 if (!strncmp(str, "PlayResX:", 9)) {
416 track->PlayResX = atoi(str + 9);
417 } else if (!strncmp(str,"PlayResY:", 9)) {
418 track->PlayResY = atoi(str + 9);
419 } else if (!strncmp(str,"Timer:", 6)) {
420 track->Timer = atof(str + 6);
421 } else if (!strncmp(str,"WrapStyle:", 10)) {
422 track->WrapStyle = atoi(str + 10);
423 }
424 return 0;
425 }
426
427 static int process_events_line(ass_track_t* track, char *str)
428 {
429 if (!strncmp(str, "Format:", 7)) {
430 char* p = str + 7;
431 skip_spaces(&p);
432 track->event_format = strdup(p);
433 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format);
434 } else if (!strncmp(str, "Dialogue:", 9)) {
435 // This should never be reached for embedded subtitles.
436 // They have slightly different format and are parsed in ass_process_chunk,
437 // called directly from demuxer
438 int eid;
439 ass_event_t* event;
440
441 str += 9;
442 skip_spaces(&str);
443
444 eid = ass_alloc_event(track);
445 event = track->events + eid;
446
447 process_event_tail(track, event, str, 0);
448 } else {
449 mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str);
450 }
451 return 0;
452 }
453
454 // Copied from mkvtoolnix
455 static unsigned char* decode_chars(unsigned char c1, unsigned char c2,
456 unsigned char c3, unsigned char c4, unsigned char* dst, int cnt)
457 {
458 uint32_t value;
459 unsigned char bytes[3];
460 int i;
461
462 value = ((c1 - 33) << 18) + ((c2 - 33) << 12) + ((c3 - 33) << 6) + (c4 - 33);
463 bytes[2] = value & 0xff;
464 bytes[1] = (value & 0xff00) >> 8;
465 bytes[0] = (value & 0xff0000) >> 16;
466
467 for (i = 0; i < cnt; ++i)
468 *dst++ = bytes[i];
469 return dst;
470 }
471
472 static int decode_font(ass_track_t* track)
473 {
474 unsigned char* p;
475 unsigned char* q;
476 int i;
477 int size; // original size
478 int dsize; // decoded size
479 unsigned char* buf = 0;
480
481 mp_msg(MSGT_GLOBAL, MSGL_V, "font: %d bytes encoded data \n", track->parser_priv->fontdata_used);
482 size = track->parser_priv->fontdata_used;
483 if (size % 4 == 1) {
484 mp_msg(MSGT_GLOBAL, MSGL_ERR, "bad encoded data size\n");
485 goto error_decode_font;
486 }
487 buf = malloc(size / 4 * 3 + 2);
488 q = buf;
489 for (i = 0, p = (unsigned char*)track->parser_priv->fontdata; i < size / 4; i++, p+=4) {
490 q = decode_chars(p[0], p[1], p[2], p[3], q, 3);
491 }
492 if (size % 4 == 2) {
493 q = decode_chars(p[0], p[1], 0, 0, q, 1);
494 } else if (size % 4 == 3) {
495 q = decode_chars(p[0], p[1], p[2], 0, q, 2);
496 }
497 dsize = q - buf;
498 assert(dsize <= size / 4 * 3 + 2);
499
500 if (extract_embedded_fonts)
501 ass_process_font(track->parser_priv->fontname, (char*)buf, dsize);
502
503 error_decode_font:
504 if (buf) free(buf);
505 free(track->parser_priv->fontname);
506 free(track->parser_priv->fontdata);
507 track->parser_priv->fontname = 0;
508 track->parser_priv->fontdata = 0;
509 track->parser_priv->fontdata_size = 0;
510 track->parser_priv->fontdata_used = 0;
511 return 0;
512 }
513
514 static char* validate_fname(char* name);
515
516 static int process_fonts_line(ass_track_t* track, char *str)
517 {
518 int len;
519
520 if (!strncmp(str, "fontname:", 9)) {
521 char* p = str + 9;
522 skip_spaces(&p);
523 if (track->parser_priv->fontname) {
524 decode_font(track);
525 }
526 track->parser_priv->fontname = validate_fname(p);
527 mp_msg(MSGT_GLOBAL, MSGL_V, "fontname: %s\n", track->parser_priv->fontname);
528 return 0;
529 }
530
531 if (!track->parser_priv->fontname) {
532 mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str);
533 return 0;
534 }
535
536 len = strlen(str);
537 if (len > 80) {
538 mp_msg(MSGT_GLOBAL, MSGL_WARN, "Font line too long: %d, %s\n", len, str);
539 return 0;
540 }
541 if (track->parser_priv->fontdata_used + len > track->parser_priv->fontdata_size) {
542 track->parser_priv->fontdata_size += 100 * 1024;
543 track->parser_priv->fontdata = realloc(track->parser_priv->fontdata, track->parser_priv->fontdata_size);
544 }
545 memcpy(track->parser_priv->fontdata + track->parser_priv->fontdata_used, str, len);
546 track->parser_priv->fontdata_used += len;
547
548 return 0;
549 }
550
382 /** 551 /**
383 * \brief Parse a header line 552 * \brief Parse a header line
384 * \param track track 553 * \param track track
385 * \param str string to parse, zero-terminated 554 * \param str string to parse, zero-terminated
386 */ 555 */
387 static int process_header_line(ass_track_t* track, char *str) 556 static int process_line(ass_track_t* track, char *str)
388 { 557 {
389 static int events_section_started = 0; 558 if (strstr(str, "[Script Info]")) { // FIXME: strstr to skip possible BOM at the beginning of the script
390 559 track->parser_priv->state = PST_INFO;
391 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "=== Header: %s\n", str); 560 } else if (!strncmp(str, "[V4 Styles]", 11)) {
392 if (strncmp(str, "PlayResX:", 9)==0) { 561 track->parser_priv->state = PST_STYLES;
393 track->PlayResX = atoi(str + 9); 562 track->track_type = TRACK_TYPE_SSA;
394 } else if (strncmp(str,"PlayResY:", 9)==0) { 563 } else if (!strncmp(str, "[V4+ Styles]", 12)) {
395 track->PlayResY = atoi(str + 9); 564 track->parser_priv->state = PST_STYLES;
396 } else if (strncmp(str,"Timer:", 6)==0) { 565 track->track_type = TRACK_TYPE_ASS;
397 track->Timer = atof(str + 6); 566 } else if (!strncmp(str, "[Events]", 8)) {
398 } else if (strstr(str,"Styles]")) { 567 track->parser_priv->state = PST_EVENTS;
399 events_section_started = 0; 568 } else if (!strncmp(str, "[Fonts]", 7)) {
400 if (strchr(str, '+')) 569 track->parser_priv->state = PST_FONTS;
401 track->track_type = TRACK_TYPE_ASS; 570 } else {
402 else 571 switch (track->parser_priv->state) {
403 track->track_type = TRACK_TYPE_SSA; 572 case PST_INFO:
404 } else if (strncmp(str,"[Events]", 8)==0) { 573 process_info_line(track, str);
405 events_section_started = 1; 574 break;
406 } else if (strncmp(str,"Format:", 7)==0) { 575 case PST_STYLES:
407 char* p = str + 7; 576 process_styles_line(track, str);
408 skip_spaces(&p); 577 break;
409 if (events_section_started) { 578 case PST_EVENTS:
410 track->event_format = strdup(p); 579 process_events_line(track, str);
411 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format); 580 break;
412 } else { 581 case PST_FONTS:
413 track->style_format = strdup(p); 582 process_fonts_line(track, str);
414 mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format); 583 break;
584 default:
585 break;
415 } 586 }
416 } else if (strncmp(str,"Style:", 6)==0) { 587 }
417 char* p = str + 6; 588
418 skip_spaces(&p); 589 // there is no explicit end-of-font marker in ssa/ass
419 process_style(track, p); 590 if ((track->parser_priv->state != PST_FONTS) && (track->parser_priv->fontname))
420 } else if (strncmp(str,"WrapStyle:", 10)==0) { 591 decode_font(track);
421 track->WrapStyle = atoi(str + 10); 592
422 }
423 return 0; 593 return 0;
424 } 594 }
425 595
426 /** 596 static int process_text(ass_track_t* track, char* str)
427 * \brief Process CodecPrivate section of subtitle stream 597 {
428 * \param track track 598 char* p = str;
429 * \param data string to parse
430 * \param size length of data
431 CodecPrivate section contains [Stream Info] and [V4+ Styles] sections
432 */
433 void ass_process_chunk(ass_track_t* track, char *data, int size)
434 {
435 char* str = malloc(size + 1);
436 char* p;
437 int sid;
438
439 memcpy(str, data, size);
440 str[size] = '\0';
441
442 p = str;
443 while(1) { 599 while(1) {
444 char* q; 600 char* q;
445 for (;((*p=='\r')||(*p=='\n'));++p) {} 601 for (;((*p=='\r')||(*p=='\n'));++p) {}
446 for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {}; 602 for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {};
447 if (q==p) 603 if (q==p)
448 break; 604 break;
449 if (*q != '\0') 605 if (*q != '\0')
450 *(q++) = '\0'; 606 *(q++) = '\0';
451 process_header_line(track, p); 607 process_line(track, p);
452 if (*q == '\0') 608 if (*q == '\0')
453 break; 609 break;
454 p = q; 610 p = q;
455 } 611 }
612 return 0;
613 }
614
615 /**
616 * \brief Process CodecPrivate section of subtitle stream
617 * \param track track
618 * \param data string to parse
619 * \param size length of data
620 CodecPrivate section contains [Stream Info] and [V4+ Styles] ([V4 Styles] for SSA) sections
621 */
622 void ass_process_codec_private(ass_track_t* track, char *data, int size)
623 {
624 char* str = malloc(size + 1);
625 int sid;
626
627 memcpy(str, data, size);
628 str[size] = '\0';
629
630 process_text(track, str);
456 free(str); 631 free(str);
457 632
458 // add "Default" style to the end 633 // add "Default" style to the end
459 // will be used if track does not contain a default style (or even does not contain styles at all) 634 // will be used if track does not contain a default style (or even does not contain styles at all)
460 sid = ass_alloc_style(track); 635 sid = ass_alloc_style(track);
462 track->styles[sid].FontName = strdup("Arial"); 637 track->styles[sid].FontName = strdup("Arial");
463 638
464 if (!track->event_format) { 639 if (!track->event_format) {
465 // probably an mkv produced by ancient mkvtoolnix 640 // probably an mkv produced by ancient mkvtoolnix
466 // such files don't have [Events] and Format: headers 641 // such files don't have [Events] and Format: headers
642 track->parser_priv->state = PST_EVENTS;
467 if (track->track_type == TRACK_TYPE_SSA) 643 if (track->track_type == TRACK_TYPE_SSA)
468 track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"); 644 track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text");
469 else 645 else
470 track->event_format = strdup("Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text"); 646 track->event_format = strdup("Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text");
471 } 647 }
486 * \param data string to parse 662 * \param data string to parse
487 * \param size length of data 663 * \param size length of data
488 * \param timecode starting time of the event (milliseconds) 664 * \param timecode starting time of the event (milliseconds)
489 * \param duration duration of the event (milliseconds) 665 * \param duration duration of the event (milliseconds)
490 */ 666 */
491 void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration) 667 void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration)
492 { 668 {
493 char* str; 669 char* str;
494 int eid; 670 int eid;
495 char* p; 671 char* p;
496 char* token; 672 char* token;
531 } while (0); 707 } while (0);
532 // some error 708 // some error
533 ass_free_event(track, eid); 709 ass_free_event(track, eid);
534 track->n_events--; 710 track->n_events--;
535 free(str); 711 free(str);
536 }
537
538 /**
539 * \brief Process a line from external file.
540 * \param track track
541 * \param str string to parse
542 * \param size length of data
543 */
544 static void ass_process_external_line(ass_track_t* track, char *str, int size)
545 {
546 int eid;
547 ass_event_t* event;
548
549 eid = ass_alloc_event(track);
550 event = track->events + eid;
551
552 if (strncmp("Dialogue:", str, 9) != 0)
553 return;
554
555 str += 9;
556 while (*str == ' ') {++str;}
557
558 process_event_tail(track, event, str, 0);
559 } 712 }
560 713
561 #ifdef USE_ICONV 714 #ifdef USE_ICONV
562 /** \brief recode buffer to utf-8 715 /** \brief recode buffer to utf-8
563 * constraint: sub_cp != 0 716 * constraint: sub_cp != 0
639 { 792 {
640 int res; 793 int res;
641 long sz; 794 long sz;
642 long bytes_read; 795 long bytes_read;
643 char* buf; 796 char* buf;
644 char* p;
645 int events_reached;
646 ass_track_t* track; 797 ass_track_t* track;
647 798
648 FILE* fp = fopen(fname, "rb"); 799 FILE* fp = fopen(fname, "rb");
649 if (!fp) { 800 if (!fp) {
650 mp_msg(MSGT_GLOBAL, MSGL_WARN, "ass_read_file(%s): fopen failed\n", fname); 801 mp_msg(MSGT_GLOBAL, MSGL_WARN, "ass_read_file(%s): fopen failed\n", fname);
696 847
697 track = ass_new_track(); 848 track = ass_new_track();
698 track->name = strdup(fname); 849 track->name = strdup(fname);
699 850
700 // process header 851 // process header
701 events_reached = 0; 852 process_text(track, buf);
702 p = buf; 853
703 while (p && (*p)) { 854 // there is no explicit end-of-font marker in ssa/ass
704 while (*p == '\n') {++p;} 855 if (track->parser_priv->fontname)
705 if (strncmp(p, "[Events]", 8) == 0) { 856 decode_font(track);
706 events_reached = 1; 857
707 } else if ((strncmp(p, "Format:", 7) == 0) && (events_reached)) {
708 p = strchr(p, '\n');
709 if (p == 0) {
710 mp_msg(MSGT_GLOBAL, MSGL_WARN, "Incomplete subtitles\n");
711 free(buf);
712 return 0;
713 }
714 ass_process_chunk(track, buf, p - buf + 1);
715 ++p;
716 break;
717 }
718 p = strchr(p, '\n');
719 }
720 // process events
721 while (p && (*p)) {
722 char* next;
723 int len;
724 while (*p == '\n') {++p;}
725 next = strchr(p, '\n');
726 len = 0;
727 if (next) {
728 len = next - p;
729 *next = 0;
730 } else {
731 len = strlen(p);
732 }
733 ass_process_external_line(track, p, len);
734 if (next) {
735 p = next + 1;
736 continue;
737 } else
738 break;
739 }
740
741 free(buf); 858 free(buf);
742 859
743 if (!events_reached) { 860 if (track->track_type == TRACK_TYPE_UNKNOWN) {
744 ass_free_track(track); 861 ass_free_track(track);
745 return 0; 862 return 0;
746 } 863 }
747 864
748 mp_msg(MSGT_GLOBAL, MSGL_INFO, "LIBASS: added subtitle file: %s (%d styles, %d events)\n", fname, track->n_styles, track->n_events); 865 mp_msg(MSGT_GLOBAL, MSGL_INFO, "LIBASS: added subtitle file: %s (%d styles, %d events)\n", fname, track->n_styles, track->n_events);
851 return ((long long)track->events[i].Start) - now; 968 return ((long long)track->events[i].Start) - now;
852 } 969 }
853 970
854 ass_track_t* ass_new_track(void) { 971 ass_track_t* ass_new_track(void) {
855 ass_track_t* track = calloc(1, sizeof(ass_track_t)); 972 ass_track_t* track = calloc(1, sizeof(ass_track_t));
973 track->parser_priv = calloc(1, sizeof(parser_priv_t));
856 return track; 974 return track;
857 } 975 }
858 976