comparison subreader.c @ 12443:ae4ae7ab636c

ENCA support (http://trific.ath.cx/software/enca/)
author henry
date Sat, 08 May 2004 17:52:25 +0000
parents eb3ad04675e1
children 44959468c64d
comparison
equal deleted inserted replaced
12442:a5fdd848c0b7 12443:ae4ae7ab636c
16 #include <dirent.h> 16 #include <dirent.h>
17 17
18 #include "config.h" 18 #include "config.h"
19 #include "mp_msg.h" 19 #include "mp_msg.h"
20 #include "subreader.h" 20 #include "subreader.h"
21
22 #ifdef HAVE_ENCA
23 #include <enca.h>
24 #endif
21 25
22 #define ERR ((void *) -1) 26 #define ERR ((void *) -1)
23 27
24 #ifdef USE_ICONV 28 #ifdef USE_ICONV
25 #include <iconv.h> 29 #include <iconv.h>
1035 extern float sub_fps; 1039 extern float sub_fps;
1036 1040
1037 #ifdef USE_ICONV 1041 #ifdef USE_ICONV
1038 static iconv_t icdsc = (iconv_t)(-1); 1042 static iconv_t icdsc = (iconv_t)(-1);
1039 1043
1040 void subcp_open (void) 1044 #ifdef HAVE_ENCA
1045 void subcp_open_noenca ()
1046 {
1047 char enca_lang[100], enca_fallback[100];
1048 if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
1049 || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
1050 subcp_open(enca_fallback);
1051 } else {
1052 subcp_open(sub_cp);
1053 }
1054 }
1055 #else
1056 void subcp_open_noenca ()
1057 {
1058 subcp_open(sub_cp);
1059 }
1060 #endif
1061
1062 void subcp_open (char *current_sub_cp)
1041 { 1063 {
1042 char *tocp = "UTF-8"; 1064 char *tocp = "UTF-8";
1043 1065
1044 if (sub_cp){ 1066 if (current_sub_cp){
1045 if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){ 1067 if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){
1046 mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n"); 1068 mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n");
1047 sub_utf8 = 2; 1069 sub_utf8 = 2;
1048 } else 1070 } else
1049 mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n"); 1071 mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n");
1050 } 1072 }
1244 subtitle * (*read)(FILE *fd,subtitle *dest); 1266 subtitle * (*read)(FILE *fd,subtitle *dest);
1245 void (*post)(subtitle *dest); 1267 void (*post)(subtitle *dest);
1246 const char *name; 1268 const char *name;
1247 }; 1269 };
1248 1270
1271 #ifdef HAVE_ENCA
1272 #define MAX_GUESS_BUFFER_SIZE (256*1024)
1273 void* guess_cp(FILE *fd, char *preferred_language, char *fallback)
1274 {
1275 const char **languages;
1276 size_t langcnt, buflen;
1277 EncaAnalyser analyser;
1278 EncaEncoding encoding;
1279 unsigned char *buffer;
1280 char *detected_sub_cp = NULL;
1281 int i;
1282
1283 buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char));
1284 buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd);
1285
1286 languages = enca_get_languages(&langcnt);
1287 mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: ");
1288 for (i = 0; i < langcnt; i++) {
1289 mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]);
1290 }
1291 mp_msg(MSGT_SUBREADER, MSGL_V, "\n");
1292
1293 for (i = 0; i < langcnt; i++) {
1294 if (strcasecmp(languages[i], preferred_language) != 0) continue;
1295 analyser = enca_analyser_alloc(languages[i]);
1296 encoding = enca_analyse_const(analyser, buffer, buflen);
1297 mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
1298 detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
1299 enca_analyser_free(analyser);
1300 }
1301
1302 free(languages);
1303 free(buffer);
1304 rewind(fd);
1305
1306 if (!detected_sub_cp) detected_sub_cp = strdup(fallback);
1307
1308 return detected_sub_cp;
1309 }
1310 #endif
1311
1249 sub_data* sub_read_file (char *filename, float fps) { 1312 sub_data* sub_read_file (char *filename, float fps) {
1250 //filename is assumed to be malloc'ed, free() is used in sub_free() 1313 //filename is assumed to be malloc'ed, free() is used in sub_free()
1251 FILE *fd; 1314 FILE *fd;
1252 int n_max, n_first, i, j, sub_first, sub_orig; 1315 int n_max, n_first, i, j, sub_first, sub_orig;
1253 subtitle *first, *second, *sub, *return_sub; 1316 subtitle *first, *second, *sub, *return_sub;
1254 sub_data *subt_data; 1317 sub_data *subt_data;
1318 char enca_lang[100], enca_fallback[100];
1255 int uses_time = 0, sub_num = 0, sub_errs = 0; 1319 int uses_time = 0, sub_num = 0, sub_errs = 0;
1320 char *current_sub_cp=NULL;
1256 struct subreader sr[]= 1321 struct subreader sr[]=
1257 { 1322 {
1258 { sub_read_line_microdvd, NULL, "microdvd" }, 1323 { sub_read_line_microdvd, NULL, "microdvd" },
1259 { sub_read_line_subrip, NULL, "subrip" }, 1324 { sub_read_line_subrip, NULL, "subrip" },
1260 { sub_read_line_subviewer, NULL, "subviewer" }, 1325 { sub_read_line_subviewer, NULL, "subviewer" },
1281 srp=sr+sub_format; 1346 srp=sr+sub_format;
1282 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected subtitle file format: %s\n", srp->name); 1347 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected subtitle file format: %s\n", srp->name);
1283 1348
1284 rewind (fd); 1349 rewind (fd);
1285 1350
1351 #ifdef HAVE_ENCA
1352 if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
1353 || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
1354 current_sub_cp = guess_cp(fd, enca_lang, enca_fallback);
1355 } else {
1356 current_sub_cp = strdup(sub_cp);
1357 }
1358 #else
1359 current_sub_cp = strdup(sub_cp);
1360 #endif
1361
1286 #ifdef USE_ICONV 1362 #ifdef USE_ICONV
1287 sub_utf8_prev=sub_utf8; 1363 sub_utf8_prev=sub_utf8;
1288 { 1364 {
1289 int l,k; 1365 int l,k;
1290 k = -1; 1366 k = -1;
1294 if (!strcasecmp(filename+(l - strlen(exts[k])), exts[k])){ 1370 if (!strcasecmp(filename+(l - strlen(exts[k])), exts[k])){
1295 sub_utf8 = 1; 1371 sub_utf8 = 1;
1296 break; 1372 break;
1297 } 1373 }
1298 } 1374 }
1299 if (k<0) subcp_open(); 1375 if (k<0) subcp_open(current_sub_cp);
1300 } 1376 }
1301 #endif 1377 #endif
1378 if (current_sub_cp) free(current_sub_cp);
1302 1379
1303 sub_num=0;n_max=32; 1380 sub_num=0;n_max=32;
1304 first=(subtitle *)malloc(n_max*sizeof(subtitle)); 1381 first=(subtitle *)malloc(n_max*sizeof(subtitle));
1305 if(!first){ 1382 if(!first){
1306 #ifdef USE_ICONV 1383 #ifdef USE_ICONV
1788 strcpy_trim(tmp_fname_trim, tmp_fname_noext); 1865 strcpy_trim(tmp_fname_trim, tmp_fname_noext);
1789 1866
1790 // does it end with a subtitle extension? 1867 // does it end with a subtitle extension?
1791 found = 0; 1868 found = 0;
1792 #ifdef USE_ICONV 1869 #ifdef USE_ICONV
1870 #ifdef HAVE_ENCA
1871 for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) {
1872 #else
1793 for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) { 1873 for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) {
1874 #endif
1794 #else 1875 #else
1795 for (i = 0; sub_exts[i]; i++) { 1876 for (i = 0; sub_exts[i]; i++) {
1796 #endif 1877 #endif
1797 if (strcmp(sub_exts[i], tmp_fname_ext) == 0) { 1878 if (strcmp(sub_exts[i], tmp_fname_ext) == 0) {
1798 found = 1; 1879 found = 1;