changeset 12443:ae4ae7ab636c

ENCA support (http://trific.ath.cx/software/enca/)
author henry
date Sat, 08 May 2004 17:52:25 +0000
parents a5fdd848c0b7
children 0971849c04b6
files Makefile configure libmpdemux/demux_mkv.c libmpdemux/demux_ogg.c subreader.c subreader.h
diffstat 6 files changed, 123 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Sat May 08 12:05:14 2004 +0000
+++ b/Makefile	Sat May 08 17:52:25 2004 +0000
@@ -35,7 +35,7 @@
 VO_LIBS = $(AA_LIB) $(X_LIB) $(SDL_LIB) $(GGI_LIB) $(MP1E_LIB) $(MLIB_LIB) $(SVGA_LIB) $(DIRECTFB_LIB) $(CACA_LIB)
 AO_LIBS = $(ARTS_LIB) $(ESD_LIB) $(NAS_LIB) $(SGIAUDIO_LIB)
 CODEC_LIBS = $(AV_LIB) $(FAME_LIB) $(MAD_LIB) $(VORBIS_LIB) $(THEORA_LIB) $(FAAD_LIB) $(LIBLZO_LIB) $(DECORE_LIB) $(XVID_LIB) $(PNG_LIB) $(Z_LIB) $(JPEG_LIB) $(ALSA_LIB) $(XMMS_LIB) $(MATROSKA_LIB) 
-COMMON_LIBS = libmpcodecs/libmpcodecs.a mp3lib/libMP3.a liba52/liba52.a libmpeg2/libmpeg2.a $(W32_LIB) $(DS_LIB) libaf/libaf.a libmpdemux/libmpdemux.a input/libinput.a postproc/libswscale.a osdep/libosdep.a $(DVDREAD_LIB) $(CODEC_LIBS) $(FREETYPE_LIB) $(TERMCAP_LIB) $(CDPARANOIA_LIB) $(MPLAYER_NETWORK_LIB) $(WIN32_LIB) $(GIF_LIB) $(MACOSX_FRAMEWORKS) $(SMBSUPPORT_LIB) $(FRIBIDI_LIB) $(FONTCONFIG_LIB)
+COMMON_LIBS = libmpcodecs/libmpcodecs.a mp3lib/libMP3.a liba52/liba52.a libmpeg2/libmpeg2.a $(W32_LIB) $(DS_LIB) libaf/libaf.a libmpdemux/libmpdemux.a input/libinput.a postproc/libswscale.a osdep/libosdep.a $(DVDREAD_LIB) $(CODEC_LIBS) $(FREETYPE_LIB) $(TERMCAP_LIB) $(CDPARANOIA_LIB) $(MPLAYER_NETWORK_LIB) $(WIN32_LIB) $(GIF_LIB) $(MACOSX_FRAMEWORKS) $(SMBSUPPORT_LIB) $(FRIBIDI_LIB) $(FONTCONFIG_LIB) $(ENCA_LIB)
 
 CFLAGS = $(OPTFLAGS) -Ilibmpdemux -Iloader -Ilibvo $(FREETYPE_INC) $(EXTRA_INC) $(CDPARANOIA_INC) $(SDL_INC) $(X11_INC) $(FRIBIDI_INC) $(DVB_INC) $(XVID_INC) $(FONTCONFIG_INC) # -Wall
 
--- a/configure	Sat May 08 12:05:14 2004 +0000
+++ b/configure	Sat May 08 17:52:25 2004 +0000
@@ -178,6 +178,7 @@
   --enable-menu          Enable OSD menu support (NOT DVD MENU) [disabled]
   --disable-sortsub      Disable subtitles sorting [enabled]
   --enable-fribidi       Enable using the FriBiDi libs [disabled]
+  --disable-enca         Disable using ENCA charset oracle library [autodetect]
   --disable-macosx       Disable Mac OS X specific features [autodetect]
   --disable-inet6        Disable IPv6 support [autodetect]
   --disable-gethostbyname2  gethostbyname() function is not provided by the C
@@ -1248,6 +1249,7 @@
 _freetypeconfig='freetype-config'
 _fribidi=no
 _fribidiconfig='fribidi-config'
+_enca=auto
 _inet6=auto
 _gethostbyname2=auto
 _ftp=yes
@@ -1472,6 +1474,9 @@
   --enable-fribidi)     _fribidi=yes    ;;
   --disable-fribidi)    _fribidi=no     ;;
 
+  --enable-enca)        _enca=yes    ;;
+  --disable-enca)       _enca=no     ;;
+
   --enable-inet6)	_inet6=yes	;;
   --disable-inet6)	_inet6=no	;;
 
@@ -4440,6 +4445,30 @@
 echores "$_fribidi"
 
 
+echocheck "ENCA"
+if test "$_enca" = auto ; then
+    cat > $TMPC << EOF
+#include <enca.h>
+int main()
+{
+    const char **langs;
+    size_t langcnt;
+    langs = enca_get_languages(&langcnt);
+    return 0;
+}
+EOF
+    _enca=no
+    cc_check -lenca && _enca=yes
+    if test "$_enca" = yes ; then
+	_def_enca='#define HAVE_ENCA 1'
+	_ld_enca='-lenca'
+    else
+	_def_enca='#undef HAVE_ENCA'
+    fi
+fi
+echores "$_enca"
+
+
 echocheck "zlib"
 cat > $TMPC << EOF
 #include <zlib.h>
@@ -5966,6 +5995,7 @@
 HAVE_MLIB = $_mlib
 WIN32_LIB = $_ld_win32libs
 STATIC_LIB = $_ld_static
+ENCA_LIB = $_ld_enca
 
 X11_INC = $_inc_x11
 X11DIR = $_ld_x11
@@ -6515,6 +6545,9 @@
 /* enable FriBiDi usage */
 $_def_fribidi
 
+/* enable ENCA usage */
+$_def_enca
+
 /* liblzo support */
 $_def_liblzo
 
--- a/libmpdemux/demux_mkv.c	Sat May 08 12:05:14 2004 +0000
+++ b/libmpdemux/demux_mkv.c	Sat May 08 17:52:25 2004 +0000
@@ -1832,7 +1832,7 @@
   char *str;
 
 #ifdef USE_ICONV
-  subcp_open();
+  subcp_open_noenca();
 #endif
 
   stream_seek(s, s->start_pos);
--- a/libmpdemux/demux_ogg.c	Sat May 08 12:05:14 2004 +0000
+++ b/libmpdemux/demux_ogg.c	Sat May 08 17:52:25 2004 +0000
@@ -664,7 +664,7 @@
   sh_video_t* sh_v;
 
 #ifdef USE_ICONV
-  subcp_open();
+  subcp_open_noenca();
 #endif
 
   clear_sub = -1;
--- a/subreader.c	Sat May 08 12:05:14 2004 +0000
+++ b/subreader.c	Sat May 08 17:52:25 2004 +0000
@@ -19,6 +19,10 @@
 #include "mp_msg.h"
 #include "subreader.h"
 
+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
 #define ERR ((void *) -1)
 
 #ifdef USE_ICONV
@@ -1037,12 +1041,30 @@
 #ifdef USE_ICONV
 static iconv_t icdsc = (iconv_t)(-1);
 
-void	subcp_open (void)
+#ifdef HAVE_ENCA
+void	subcp_open_noenca ()
+{
+    char enca_lang[100], enca_fallback[100];
+    if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
+	|| sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
+	subcp_open(enca_fallback);
+    } else {
+	subcp_open(sub_cp);
+    }
+}
+#else
+void	subcp_open_noenca ()
+{
+    subcp_open(sub_cp);
+}
+#endif
+
+void	subcp_open (char *current_sub_cp)
 {
 	char *tocp = "UTF-8";
 
-	if (sub_cp){
-		if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){
+	if (current_sub_cp){
+		if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){
 			mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n");
 			sub_utf8 = 2;
 		} else
@@ -1246,13 +1268,56 @@
     const char *name;
 };
 
+#ifdef HAVE_ENCA
+#define MAX_GUESS_BUFFER_SIZE (256*1024)
+void* guess_cp(FILE *fd, char *preferred_language, char *fallback)
+{
+    const char **languages;
+    size_t langcnt, buflen;
+    EncaAnalyser analyser;
+    EncaEncoding encoding;
+    unsigned char *buffer;
+    char *detected_sub_cp = NULL;
+    int i;
+
+    buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char));
+    buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd);
+
+    languages = enca_get_languages(&langcnt);
+    mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: ");
+    for (i = 0; i < langcnt; i++) {
+	mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]);
+    }
+    mp_msg(MSGT_SUBREADER, MSGL_V, "\n");
+    
+    for (i = 0; i < langcnt; i++) {
+	if (strcasecmp(languages[i], preferred_language) != 0) continue;
+	analyser = enca_analyser_alloc(languages[i]);
+	encoding = enca_analyse_const(analyser, buffer, buflen);
+	mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
+	detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
+	enca_analyser_free(analyser);
+    }
+    
+    free(languages);
+    free(buffer);
+    rewind(fd);
+
+    if (!detected_sub_cp) detected_sub_cp = strdup(fallback);
+
+    return detected_sub_cp;
+}
+#endif
+
 sub_data* sub_read_file (char *filename, float fps) {
         //filename is assumed to be malloc'ed,  free() is used in sub_free()
     FILE *fd;
     int n_max, n_first, i, j, sub_first, sub_orig;
     subtitle *first, *second, *sub, *return_sub;
     sub_data *subt_data;
+    char enca_lang[100], enca_fallback[100];
     int uses_time = 0, sub_num = 0, sub_errs = 0;
+    char *current_sub_cp=NULL;
     struct subreader sr[]=
     {
 	    { sub_read_line_microdvd, NULL, "microdvd" },
@@ -1283,6 +1348,17 @@
     
     rewind (fd);
 
+#ifdef HAVE_ENCA
+    if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
+	|| sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
+	current_sub_cp = guess_cp(fd, enca_lang, enca_fallback);
+    } else {
+	current_sub_cp = strdup(sub_cp);
+    }
+#else
+    current_sub_cp = strdup(sub_cp);
+#endif
+
 #ifdef USE_ICONV
     sub_utf8_prev=sub_utf8;
     {
@@ -1296,9 +1372,10 @@
 			    break;
 			}
 	    }
-	    if (k<0) subcp_open();
+	    if (k<0) subcp_open(current_sub_cp);
     }
 #endif
+    if (current_sub_cp) free(current_sub_cp);
 
     sub_num=0;n_max=32;
     first=(subtitle *)malloc(n_max*sizeof(subtitle));
@@ -1790,7 +1867,11 @@
 		// does it end with a subtitle extension?
 		found = 0;
 #ifdef USE_ICONV
+#ifdef HAVE_ENCA
+		for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) {
+#else
 		for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) {
+#endif
 #else
 		for (i = 0; sub_exts[i]; i++) {
 #endif
--- a/subreader.h	Sat May 08 12:05:14 2004 +0000
+++ b/subreader.h	Sat May 08 17:52:25 2004 +0000
@@ -53,7 +53,8 @@
 
 sub_data* sub_read_file (char *filename, float pts);
 subtitle* subcp_recode1 (subtitle *sub);
-void subcp_open (void); /* for demux_ogg.c */
+void subcp_open (char *current_sub_cp); /* for demux_ogg.c */
+void subcp_open_noenca (); /* for demux_ogg.c */
 void subcp_close (void); /* for demux_ogg.c */
 char ** sub_filenames(char *path, char *fname);
 void list_sub_file(sub_data* subd);