Mercurial > mplayer.hg
diff subreader.c @ 12443:ae4ae7ab636c
ENCA support (http://trific.ath.cx/software/enca/)
author | henry |
---|---|
date | Sat, 08 May 2004 17:52:25 +0000 |
parents | eb3ad04675e1 |
children | 44959468c64d |
line wrap: on
line diff
--- a/subreader.c Sat May 08 12:05:14 2004 +0000 +++ b/subreader.c Sat May 08 17:52:25 2004 +0000 @@ -19,6 +19,10 @@ #include "mp_msg.h" #include "subreader.h" +#ifdef HAVE_ENCA +#include <enca.h> +#endif + #define ERR ((void *) -1) #ifdef USE_ICONV @@ -1037,12 +1041,30 @@ #ifdef USE_ICONV static iconv_t icdsc = (iconv_t)(-1); -void subcp_open (void) +#ifdef HAVE_ENCA +void subcp_open_noenca () +{ + char enca_lang[100], enca_fallback[100]; + if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 + || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { + subcp_open(enca_fallback); + } else { + subcp_open(sub_cp); + } +} +#else +void subcp_open_noenca () +{ + subcp_open(sub_cp); +} +#endif + +void subcp_open (char *current_sub_cp) { char *tocp = "UTF-8"; - if (sub_cp){ - if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){ + if (current_sub_cp){ + if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){ mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n"); sub_utf8 = 2; } else @@ -1246,13 +1268,56 @@ const char *name; }; +#ifdef HAVE_ENCA +#define MAX_GUESS_BUFFER_SIZE (256*1024) +void* guess_cp(FILE *fd, char *preferred_language, char *fallback) +{ + const char **languages; + size_t langcnt, buflen; + EncaAnalyser analyser; + EncaEncoding encoding; + unsigned char *buffer; + char *detected_sub_cp = NULL; + int i; + + buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char)); + buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd); + + languages = enca_get_languages(&langcnt); + mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: "); + for (i = 0; i < langcnt; i++) { + mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]); + } + mp_msg(MSGT_SUBREADER, MSGL_V, "\n"); + + for (i = 0; i < langcnt; i++) { + if (strcasecmp(languages[i], preferred_language) != 0) continue; + analyser = enca_analyser_alloc(languages[i]); + encoding = enca_analyse_const(analyser, buffer, buflen); + mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); + detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); + enca_analyser_free(analyser); + } + + free(languages); + free(buffer); + rewind(fd); + + if (!detected_sub_cp) detected_sub_cp = strdup(fallback); + + return detected_sub_cp; +} +#endif + sub_data* sub_read_file (char *filename, float fps) { //filename is assumed to be malloc'ed, free() is used in sub_free() FILE *fd; int n_max, n_first, i, j, sub_first, sub_orig; subtitle *first, *second, *sub, *return_sub; sub_data *subt_data; + char enca_lang[100], enca_fallback[100]; int uses_time = 0, sub_num = 0, sub_errs = 0; + char *current_sub_cp=NULL; struct subreader sr[]= { { sub_read_line_microdvd, NULL, "microdvd" }, @@ -1283,6 +1348,17 @@ rewind (fd); +#ifdef HAVE_ENCA + if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 + || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { + current_sub_cp = guess_cp(fd, enca_lang, enca_fallback); + } else { + current_sub_cp = strdup(sub_cp); + } +#else + current_sub_cp = strdup(sub_cp); +#endif + #ifdef USE_ICONV sub_utf8_prev=sub_utf8; { @@ -1296,9 +1372,10 @@ break; } } - if (k<0) subcp_open(); + if (k<0) subcp_open(current_sub_cp); } #endif + if (current_sub_cp) free(current_sub_cp); sub_num=0;n_max=32; first=(subtitle *)malloc(n_max*sizeof(subtitle)); @@ -1790,7 +1867,11 @@ // does it end with a subtitle extension? found = 0; #ifdef USE_ICONV +#ifdef HAVE_ENCA + for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) { +#else for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) { +#endif #else for (i = 0; sub_exts[i]; i++) { #endif