diff subreader.c @ 12443:ae4ae7ab636c

ENCA support (http://trific.ath.cx/software/enca/)
author henry
date Sat, 08 May 2004 17:52:25 +0000
parents eb3ad04675e1
children 44959468c64d
line wrap: on
line diff
--- a/subreader.c	Sat May 08 12:05:14 2004 +0000
+++ b/subreader.c	Sat May 08 17:52:25 2004 +0000
@@ -19,6 +19,10 @@
 #include "mp_msg.h"
 #include "subreader.h"
 
+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
 #define ERR ((void *) -1)
 
 #ifdef USE_ICONV
@@ -1037,12 +1041,30 @@
 #ifdef USE_ICONV
 static iconv_t icdsc = (iconv_t)(-1);
 
-void	subcp_open (void)
+#ifdef HAVE_ENCA
+void	subcp_open_noenca ()
+{
+    char enca_lang[100], enca_fallback[100];
+    if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
+	|| sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
+	subcp_open(enca_fallback);
+    } else {
+	subcp_open(sub_cp);
+    }
+}
+#else
+void	subcp_open_noenca ()
+{
+    subcp_open(sub_cp);
+}
+#endif
+
+void	subcp_open (char *current_sub_cp)
 {
 	char *tocp = "UTF-8";
 
-	if (sub_cp){
-		if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){
+	if (current_sub_cp){
+		if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){
 			mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n");
 			sub_utf8 = 2;
 		} else
@@ -1246,13 +1268,56 @@
     const char *name;
 };
 
+#ifdef HAVE_ENCA
+#define MAX_GUESS_BUFFER_SIZE (256*1024)
+void* guess_cp(FILE *fd, char *preferred_language, char *fallback)
+{
+    const char **languages;
+    size_t langcnt, buflen;
+    EncaAnalyser analyser;
+    EncaEncoding encoding;
+    unsigned char *buffer;
+    char *detected_sub_cp = NULL;
+    int i;
+
+    buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char));
+    buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd);
+
+    languages = enca_get_languages(&langcnt);
+    mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: ");
+    for (i = 0; i < langcnt; i++) {
+	mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]);
+    }
+    mp_msg(MSGT_SUBREADER, MSGL_V, "\n");
+    
+    for (i = 0; i < langcnt; i++) {
+	if (strcasecmp(languages[i], preferred_language) != 0) continue;
+	analyser = enca_analyser_alloc(languages[i]);
+	encoding = enca_analyse_const(analyser, buffer, buflen);
+	mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
+	detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
+	enca_analyser_free(analyser);
+    }
+    
+    free(languages);
+    free(buffer);
+    rewind(fd);
+
+    if (!detected_sub_cp) detected_sub_cp = strdup(fallback);
+
+    return detected_sub_cp;
+}
+#endif
+
 sub_data* sub_read_file (char *filename, float fps) {
         //filename is assumed to be malloc'ed,  free() is used in sub_free()
     FILE *fd;
     int n_max, n_first, i, j, sub_first, sub_orig;
     subtitle *first, *second, *sub, *return_sub;
     sub_data *subt_data;
+    char enca_lang[100], enca_fallback[100];
     int uses_time = 0, sub_num = 0, sub_errs = 0;
+    char *current_sub_cp=NULL;
     struct subreader sr[]=
     {
 	    { sub_read_line_microdvd, NULL, "microdvd" },
@@ -1283,6 +1348,17 @@
     
     rewind (fd);
 
+#ifdef HAVE_ENCA
+    if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
+	|| sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
+	current_sub_cp = guess_cp(fd, enca_lang, enca_fallback);
+    } else {
+	current_sub_cp = strdup(sub_cp);
+    }
+#else
+    current_sub_cp = strdup(sub_cp);
+#endif
+
 #ifdef USE_ICONV
     sub_utf8_prev=sub_utf8;
     {
@@ -1296,9 +1372,10 @@
 			    break;
 			}
 	    }
-	    if (k<0) subcp_open();
+	    if (k<0) subcp_open(current_sub_cp);
     }
 #endif
+    if (current_sub_cp) free(current_sub_cp);
 
     sub_num=0;n_max=32;
     first=(subtitle *)malloc(n_max*sizeof(subtitle));
@@ -1790,7 +1867,11 @@
 		// does it end with a subtitle extension?
 		found = 0;
 #ifdef USE_ICONV
+#ifdef HAVE_ENCA
+		for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) {
+#else
 		for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) {
+#endif
 #else
 		for (i = 0; sub_exts[i]; i++) {
 #endif