Mercurial > mplayer.hg
comparison subreader.c @ 12443:ae4ae7ab636c
ENCA support (http://trific.ath.cx/software/enca/)
author | henry |
---|---|
date | Sat, 08 May 2004 17:52:25 +0000 |
parents | eb3ad04675e1 |
children | 44959468c64d |
comparison
equal
deleted
inserted
replaced
12442:a5fdd848c0b7 | 12443:ae4ae7ab636c |
---|---|
16 #include <dirent.h> | 16 #include <dirent.h> |
17 | 17 |
18 #include "config.h" | 18 #include "config.h" |
19 #include "mp_msg.h" | 19 #include "mp_msg.h" |
20 #include "subreader.h" | 20 #include "subreader.h" |
21 | |
22 #ifdef HAVE_ENCA | |
23 #include <enca.h> | |
24 #endif | |
21 | 25 |
22 #define ERR ((void *) -1) | 26 #define ERR ((void *) -1) |
23 | 27 |
24 #ifdef USE_ICONV | 28 #ifdef USE_ICONV |
25 #include <iconv.h> | 29 #include <iconv.h> |
1035 extern float sub_fps; | 1039 extern float sub_fps; |
1036 | 1040 |
1037 #ifdef USE_ICONV | 1041 #ifdef USE_ICONV |
1038 static iconv_t icdsc = (iconv_t)(-1); | 1042 static iconv_t icdsc = (iconv_t)(-1); |
1039 | 1043 |
1040 void subcp_open (void) | 1044 #ifdef HAVE_ENCA |
1045 void subcp_open_noenca () | |
1046 { | |
1047 char enca_lang[100], enca_fallback[100]; | |
1048 if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 | |
1049 || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { | |
1050 subcp_open(enca_fallback); | |
1051 } else { | |
1052 subcp_open(sub_cp); | |
1053 } | |
1054 } | |
1055 #else | |
1056 void subcp_open_noenca () | |
1057 { | |
1058 subcp_open(sub_cp); | |
1059 } | |
1060 #endif | |
1061 | |
1062 void subcp_open (char *current_sub_cp) | |
1041 { | 1063 { |
1042 char *tocp = "UTF-8"; | 1064 char *tocp = "UTF-8"; |
1043 | 1065 |
1044 if (sub_cp){ | 1066 if (current_sub_cp){ |
1045 if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){ | 1067 if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){ |
1046 mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n"); | 1068 mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n"); |
1047 sub_utf8 = 2; | 1069 sub_utf8 = 2; |
1048 } else | 1070 } else |
1049 mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n"); | 1071 mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n"); |
1050 } | 1072 } |
1244 subtitle * (*read)(FILE *fd,subtitle *dest); | 1266 subtitle * (*read)(FILE *fd,subtitle *dest); |
1245 void (*post)(subtitle *dest); | 1267 void (*post)(subtitle *dest); |
1246 const char *name; | 1268 const char *name; |
1247 }; | 1269 }; |
1248 | 1270 |
1271 #ifdef HAVE_ENCA | |
1272 #define MAX_GUESS_BUFFER_SIZE (256*1024) | |
1273 void* guess_cp(FILE *fd, char *preferred_language, char *fallback) | |
1274 { | |
1275 const char **languages; | |
1276 size_t langcnt, buflen; | |
1277 EncaAnalyser analyser; | |
1278 EncaEncoding encoding; | |
1279 unsigned char *buffer; | |
1280 char *detected_sub_cp = NULL; | |
1281 int i; | |
1282 | |
1283 buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char)); | |
1284 buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd); | |
1285 | |
1286 languages = enca_get_languages(&langcnt); | |
1287 mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: "); | |
1288 for (i = 0; i < langcnt; i++) { | |
1289 mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]); | |
1290 } | |
1291 mp_msg(MSGT_SUBREADER, MSGL_V, "\n"); | |
1292 | |
1293 for (i = 0; i < langcnt; i++) { | |
1294 if (strcasecmp(languages[i], preferred_language) != 0) continue; | |
1295 analyser = enca_analyser_alloc(languages[i]); | |
1296 encoding = enca_analyse_const(analyser, buffer, buflen); | |
1297 mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); | |
1298 detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); | |
1299 enca_analyser_free(analyser); | |
1300 } | |
1301 | |
1302 free(languages); | |
1303 free(buffer); | |
1304 rewind(fd); | |
1305 | |
1306 if (!detected_sub_cp) detected_sub_cp = strdup(fallback); | |
1307 | |
1308 return detected_sub_cp; | |
1309 } | |
1310 #endif | |
1311 | |
1249 sub_data* sub_read_file (char *filename, float fps) { | 1312 sub_data* sub_read_file (char *filename, float fps) { |
1250 //filename is assumed to be malloc'ed, free() is used in sub_free() | 1313 //filename is assumed to be malloc'ed, free() is used in sub_free() |
1251 FILE *fd; | 1314 FILE *fd; |
1252 int n_max, n_first, i, j, sub_first, sub_orig; | 1315 int n_max, n_first, i, j, sub_first, sub_orig; |
1253 subtitle *first, *second, *sub, *return_sub; | 1316 subtitle *first, *second, *sub, *return_sub; |
1254 sub_data *subt_data; | 1317 sub_data *subt_data; |
1318 char enca_lang[100], enca_fallback[100]; | |
1255 int uses_time = 0, sub_num = 0, sub_errs = 0; | 1319 int uses_time = 0, sub_num = 0, sub_errs = 0; |
1320 char *current_sub_cp=NULL; | |
1256 struct subreader sr[]= | 1321 struct subreader sr[]= |
1257 { | 1322 { |
1258 { sub_read_line_microdvd, NULL, "microdvd" }, | 1323 { sub_read_line_microdvd, NULL, "microdvd" }, |
1259 { sub_read_line_subrip, NULL, "subrip" }, | 1324 { sub_read_line_subrip, NULL, "subrip" }, |
1260 { sub_read_line_subviewer, NULL, "subviewer" }, | 1325 { sub_read_line_subviewer, NULL, "subviewer" }, |
1281 srp=sr+sub_format; | 1346 srp=sr+sub_format; |
1282 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected subtitle file format: %s\n", srp->name); | 1347 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected subtitle file format: %s\n", srp->name); |
1283 | 1348 |
1284 rewind (fd); | 1349 rewind (fd); |
1285 | 1350 |
1351 #ifdef HAVE_ENCA | |
1352 if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 | |
1353 || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { | |
1354 current_sub_cp = guess_cp(fd, enca_lang, enca_fallback); | |
1355 } else { | |
1356 current_sub_cp = strdup(sub_cp); | |
1357 } | |
1358 #else | |
1359 current_sub_cp = strdup(sub_cp); | |
1360 #endif | |
1361 | |
1286 #ifdef USE_ICONV | 1362 #ifdef USE_ICONV |
1287 sub_utf8_prev=sub_utf8; | 1363 sub_utf8_prev=sub_utf8; |
1288 { | 1364 { |
1289 int l,k; | 1365 int l,k; |
1290 k = -1; | 1366 k = -1; |
1294 if (!strcasecmp(filename+(l - strlen(exts[k])), exts[k])){ | 1370 if (!strcasecmp(filename+(l - strlen(exts[k])), exts[k])){ |
1295 sub_utf8 = 1; | 1371 sub_utf8 = 1; |
1296 break; | 1372 break; |
1297 } | 1373 } |
1298 } | 1374 } |
1299 if (k<0) subcp_open(); | 1375 if (k<0) subcp_open(current_sub_cp); |
1300 } | 1376 } |
1301 #endif | 1377 #endif |
1378 if (current_sub_cp) free(current_sub_cp); | |
1302 | 1379 |
1303 sub_num=0;n_max=32; | 1380 sub_num=0;n_max=32; |
1304 first=(subtitle *)malloc(n_max*sizeof(subtitle)); | 1381 first=(subtitle *)malloc(n_max*sizeof(subtitle)); |
1305 if(!first){ | 1382 if(!first){ |
1306 #ifdef USE_ICONV | 1383 #ifdef USE_ICONV |
1788 strcpy_trim(tmp_fname_trim, tmp_fname_noext); | 1865 strcpy_trim(tmp_fname_trim, tmp_fname_noext); |
1789 | 1866 |
1790 // does it end with a subtitle extension? | 1867 // does it end with a subtitle extension? |
1791 found = 0; | 1868 found = 0; |
1792 #ifdef USE_ICONV | 1869 #ifdef USE_ICONV |
1870 #ifdef HAVE_ENCA | |
1871 for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) { | |
1872 #else | |
1793 for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) { | 1873 for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) { |
1874 #endif | |
1794 #else | 1875 #else |
1795 for (i = 0; sub_exts[i]; i++) { | 1876 for (i = 0; sub_exts[i]; i++) { |
1796 #endif | 1877 #endif |
1797 if (strcmp(sub_exts[i], tmp_fname_ext) == 0) { | 1878 if (strcmp(sub_exts[i], tmp_fname_ext) == 0) { |
1798 found = 1; | 1879 found = 1; |