comparison subreader.c @ 23425:098e38f17263

Recode the whole subtitle file to utf-8 before parsing. This allows reading subtitles in ucs-2.
author eugeni
date Fri, 01 Jun 2007 19:58:29 +0000
parents 3a30fc845a9c
children af3ae78477b3
comparison
equal deleted inserted replaced
23424:7286d245bf33 23425:098e38f17263
17 17
18 #include "config.h" 18 #include "config.h"
19 #include "mp_msg.h" 19 #include "mp_msg.h"
20 #include "subreader.h" 20 #include "subreader.h"
21 #include "stream/stream.h" 21 #include "stream/stream.h"
22 #include "libass/ass.h"
22 23
23 #ifdef HAVE_ENCA 24 #ifdef HAVE_ENCA
24 #include <enca.h> 25 #include <enca.h>
25 #endif 26 #endif
26 27
1334 #undef MAX_GUESS_BUFFER_SIZE 1335 #undef MAX_GUESS_BUFFER_SIZE
1335 #endif 1336 #endif
1336 1337
1337 sub_data* sub_read_file (char *filename, float fps) { 1338 sub_data* sub_read_file (char *filename, float fps) {
1338 stream_t* fd; 1339 stream_t* fd;
1340 unsigned char* subfile_buf;
1341 size_t subfile_size;
1339 int n_max, n_first, i, j, sub_first, sub_orig; 1342 int n_max, n_first, i, j, sub_first, sub_orig;
1340 subtitle *first, *second, *sub, *return_sub; 1343 subtitle *first, *second, *sub, *return_sub;
1341 sub_data *subt_data; 1344 sub_data *subt_data;
1342 int uses_time = 0, sub_num = 0, sub_errs = 0; 1345 int uses_time = 0, sub_num = 0, sub_errs = 0;
1343 struct subreader sr[]= 1346 struct subreader sr[]=
1358 { sub_read_line_mpl2, NULL, "mpl2" } 1361 { sub_read_line_mpl2, NULL, "mpl2" }
1359 }; 1362 };
1360 struct subreader *srp; 1363 struct subreader *srp;
1361 1364
1362 if(filename==NULL) return NULL; //qnx segfault 1365 if(filename==NULL) return NULL; //qnx segfault
1366
1363 i = 0; 1367 i = 0;
1364 fd=open_stream (filename, NULL, &i); if (!fd) return NULL; 1368 subfile_buf = read_file_recode(filename, sub_cp, &subfile_size);
1369 if (!subfile_buf) return 0;
1370 fd = new_memory_stream(subfile_buf, subfile_size);
1371 if (!fd) {
1372 free(subfile_buf);
1373 return 0;
1374 }
1365 1375
1366 sub_format=sub_autodetect (fd, &uses_time); 1376 sub_format=sub_autodetect (fd, &uses_time);
1367 mpsub_multiplier = (uses_time ? 100.0 : 1.0); 1377 mpsub_multiplier = (uses_time ? 100.0 : 1.0);
1368 if (sub_format==SUB_INVALID) {mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: Could not determine file format\n");return NULL;} 1378 if (sub_format==SUB_INVALID) {mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: Could not determine file format\n");return NULL;}
1369 srp=sr+sub_format; 1379 srp=sr+sub_format;
1370 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected subtitle file format: %s\n", srp->name); 1380 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected subtitle file format: %s\n", srp->name);
1371 1381
1372 stream_reset(fd); 1382 stream_reset(fd);
1373 stream_seek(fd,0); 1383 stream_seek(fd,0);
1374 1384
1375 #ifdef USE_ICONV
1376 sub_utf8_prev=sub_utf8; 1385 sub_utf8_prev=sub_utf8;
1377 { 1386 sub_utf8 = 1;
1378 int l,k;
1379 k = -1;
1380 if ((l=strlen(filename))>4){
1381 char *exts[] = {".utf", ".utf8", ".utf-8" };
1382 for (k=3;--k>=0;)
1383 if (l >= strlen(exts[k]) && !strcasecmp(filename+(l - strlen(exts[k])), exts[k])){
1384 sub_utf8 = 1;
1385 break;
1386 }
1387 }
1388 if (k<0) subcp_open(fd);
1389 }
1390 #endif
1391 1387
1392 sub_num=0;n_max=32; 1388 sub_num=0;n_max=32;
1393 first=malloc(n_max*sizeof(subtitle)); 1389 first=malloc(n_max*sizeof(subtitle));
1394 if(!first){ 1390 if(!first){
1395 #ifdef USE_ICONV 1391 sub_utf8 = sub_utf8_prev;
1396 subcp_close(); 1392 free(subfile_buf);
1397 sub_utf8=sub_utf8_prev;
1398 #endif
1399 return NULL; 1393 return NULL;
1400 } 1394 }
1401 1395
1402 #ifdef USE_SORTSUB 1396 #ifdef USE_SORTSUB
1403 sub = malloc(sizeof(subtitle)); 1397 sub = malloc(sizeof(subtitle));
1414 sub = &first[sub_num]; 1408 sub = &first[sub_num];
1415 #endif 1409 #endif
1416 memset(sub, '\0', sizeof(subtitle)); 1410 memset(sub, '\0', sizeof(subtitle));
1417 sub=srp->read(fd,sub); 1411 sub=srp->read(fd,sub);
1418 if(!sub) break; // EOF 1412 if(!sub) break; // EOF
1419 #ifdef USE_ICONV
1420 if ((sub!=ERR) && (sub_utf8 & 2)) sub=subcp_recode(sub);
1421 #endif
1422 #ifdef USE_FRIBIDI 1413 #ifdef USE_FRIBIDI
1423 if (sub!=ERR) sub=sub_fribidi(sub,sub_utf8); 1414 if (sub!=ERR) sub=sub_fribidi(sub,sub_utf8);
1424 #endif 1415 #endif
1425 if ( sub == ERR ) 1416 if ( sub == ERR )
1426 { 1417 {
1427 #ifdef USE_ICONV
1428 subcp_close();
1429 #endif
1430 if ( first ) free(first); 1418 if ( first ) free(first);
1419 free(subfile_buf);
1431 return NULL; 1420 return NULL;
1432 } 1421 }
1433 // Apply any post processing that needs recoding first 1422 // Apply any post processing that needs recoding first
1434 if ((sub!=ERR) && !sub_no_text_pp && srp->post) srp->post(sub); 1423 if ((sub!=ERR) && !sub_no_text_pp && srp->post) srp->post(sub);
1435 #ifdef USE_SORTSUB 1424 #ifdef USE_SORTSUB
1474 #endif 1463 #endif
1475 if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid 1464 if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid
1476 } 1465 }
1477 1466
1478 free_stream(fd); 1467 free_stream(fd);
1479 1468 free(subfile_buf);
1480 #ifdef USE_ICONV
1481 subcp_close();
1482 #endif
1483 1469
1484 // printf ("SUB: Subtitle format %s time.\n", uses_time?"uses":"doesn't use"); 1470 // printf ("SUB: Subtitle format %s time.\n", uses_time?"uses":"doesn't use");
1485 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Read %i subtitles", sub_num); 1471 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Read %i subtitles", sub_num);
1486 if (sub_errs) mp_msg(MSGT_SUBREADER,MSGL_INFO,", %i bad line(s).\n", sub_errs); 1472 if (sub_errs) mp_msg(MSGT_SUBREADER,MSGL_INFO,", %i bad line(s).\n", sub_errs);
1487 else mp_msg(MSGT_SUBREADER,MSGL_INFO,".\n"); 1473 else mp_msg(MSGT_SUBREADER,MSGL_INFO,".\n");