comparison subreader.c @ 31686:b41cbf02f854

subtitles: convert SRT/MicroDVD markup into ASS markup As the title says. If libass support is enabled, in SRT subs, HTML-style markup (<b>, </i>, </font>, etc.) is converted to ASS tags. Similarly, MicroDVD-style markup is converted. Patch by ubitux, ubitux AT gmail DOT com.
author greg
date Sun, 18 Jul 2010 22:00:48 +0000
parents dc26022e9c27
children 8f7554cf4441
comparison
equal deleted inserted replaced
31685:31b6397e3b28 31686:b41cbf02f854
31 31
32 #include "config.h" 32 #include "config.h"
33 #include "mp_msg.h" 33 #include "mp_msg.h"
34 #include "mpcommon.h" 34 #include "mpcommon.h"
35 #include "subreader.h" 35 #include "subreader.h"
36 #include "subassconvert.h"
36 #include "libvo/sub.h" 37 #include "libvo/sub.h"
37 #include "stream/stream.h" 38 #include "stream/stream.h"
38 #include "libavutil/common.h" 39 #include "libavutil/common.h"
39 #include "libavutil/avstring.h" 40 #include "libavutil/avstring.h"
41 #include "libass/ass_mp.h"
40 42
41 #ifdef CONFIG_ENCA 43 #ifdef CONFIG_ENCA
42 #include <enca.h> 44 #include <enca.h>
43 #endif 45 #endif
44 46
294 &(current->start), line2) < 2) && 296 &(current->start), line2) < 2) &&
295 (sscanf (line, 297 (sscanf (line,
296 "{%ld}{%ld}%[^\r\n]", 298 "{%ld}{%ld}%[^\r\n]",
297 &(current->start), &(current->end), line2) < 3)); 299 &(current->start), &(current->end), line2) < 3));
298 300
299 p=line2; 301 if (ass_enabled) {
302 subassconvert_microdvd(line2, line, LINE_LEN + 1);
303 p = line;
304 } else
305 p = line2;
300 306
301 next=p, i=0; 307 next=p, i=0;
302 while ((next =sub_readtext (next, &(current->text[i])))) { 308 while ((next =sub_readtext (next, &(current->text[i])))) {
303 if (current->text[i]==ERR) {return ERR;} 309 if (current->text[i]==ERR) {return ERR;}
304 i++; 310 i++;
363 break; 369 break;
364 } 370 }
365 return current; 371 return current;
366 } 372 }
367 373
374 static subtitle *sub_ass_read_line_subviewer(stream_t *st, subtitle *current, int utf16)
375 {
376 int h1, m1, s1, ms1, h2, m2, s2, ms2, j = 0;
377
378 while (!current->text[0]) {
379 char line[LINE_LEN + 1], full_line[LINE_LEN + 1], sep;
380 int i;
381
382 /* Parse SubRip header */
383 if (!stream_read_line(st, line, LINE_LEN, utf16))
384 return NULL;
385 if (sscanf(line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",
386 &h1, &m1, &s1, &sep, &ms1, &h2, &m2, &s2, &sep, &ms2) < 10)
387 continue;
388
389 current->start = h1 * 360000 + m1 * 6000 + s1 * 100 + ms1 / 10;
390 current->end = h2 * 360000 + m2 * 6000 + s2 * 100 + ms2 / 10;
391
392 /* Concat lines */
393 full_line[0] = 0;
394 for (i = 0; i < SUB_MAX_TEXT; i++) {
395 int blank = 1, len = 0;
396 char *p;
397
398 if (!stream_read_line(st, line, LINE_LEN, utf16))
399 break;
400
401 for (p = line; *p != '\n' && *p != '\r' && *p; p++, len++)
402 if (*p != ' ' && *p != '\t')
403 blank = 0;
404
405 if (blank)
406 break;
407
408 *p = 0;
409
410 if (len >= sizeof(full_line) - j - 2)
411 break;
412
413 if (j != 0)
414 full_line[j++] = '\n';
415 strcpy(&full_line[j], line);
416 j += len;
417 }
418
419 /* Use the ASS/SSA converter to transform the whole lines */
420 if (full_line[0]) {
421 char converted_line[LINE_LEN + 1];
422 subassconvert_subrip(full_line, converted_line, LINE_LEN + 1);
423 current->text[0] = strdup(converted_line);
424 current->lines = 1;
425 }
426 }
427 return current;
428 }
429
368 static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current, int utf16) { 430 static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current, int utf16) {
369 char line[LINE_LEN+1]; 431 char line[LINE_LEN+1];
370 int a1,a2,a3,a4,b1,b2,b3,b4; 432 int a1,a2,a3,a4,b1,b2,b3,b4;
371 char *p=NULL; 433 char *p=NULL;
372 int i,len; 434 int i,len;
373 435
436 if (ass_enabled)
437 return sub_ass_read_line_subviewer(st, current, utf16);
374 while (!current->text[0]) { 438 while (!current->text[0]) {
375 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; 439 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
376 if ((len=sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1,&a2,&a3,(char *)&i,&a4,&b1,&b2,&b3,(char *)&i,&b4)) < 10) 440 if ((len=sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1,&a2,&a3,(char *)&i,&a4,&b1,&b2,&b3,(char *)&i,&b4)) < 10)
377 continue; 441 continue;
378 current->start = a1*360000+a2*6000+a3*100+a4/10; 442 current->start = a1*360000+a2*6000+a3*100+a4/10;
2288 * \brief parse text and append it to subtitle in sub 2352 * \brief parse text and append it to subtitle in sub
2289 * \param sub subtitle struct to add text to 2353 * \param sub subtitle struct to add text to
2290 * \param txt text to parse 2354 * \param txt text to parse
2291 * \param len length of text in txt 2355 * \param len length of text in txt
2292 * \param endpts pts at which this subtitle text should be removed again 2356 * \param endpts pts at which this subtitle text should be removed again
2357 * \param strip_markup if strip markup is set (!= 0), markup tags like <b></b> are ignored
2293 * 2358 *
2294 * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r' 2359 * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r'
2295 * and '\0' are interpreted as newlines, duplicate, leading and trailing 2360 * and '\0' are interpreted as newlines, duplicate, leading and trailing
2296 * newlines are ignored. 2361 * newlines are ignored.
2297 */ 2362 */
2298 void sub_add_text(subtitle *sub, const char *txt, int len, double endpts) { 2363 void sub_add_text(subtitle *sub, const char *txt, int len, double endpts, int strip_markup) {
2299 int comment = 0; 2364 int comment = 0;
2300 int double_newline = 1; // ignore newlines at the beginning 2365 int double_newline = 1; // ignore newlines at the beginning
2301 int i, pos; 2366 int i, pos;
2302 char *buf; 2367 char *buf;
2303 #ifdef CONFIG_FRIBIDI 2368 #ifdef CONFIG_FRIBIDI
2306 if (sub->lines >= SUB_MAX_TEXT) return; 2371 if (sub->lines >= SUB_MAX_TEXT) return;
2307 pos = 0; 2372 pos = 0;
2308 buf = malloc(MAX_SUBLINE + 1); 2373 buf = malloc(MAX_SUBLINE + 1);
2309 sub->text[sub->lines] = buf; 2374 sub->text[sub->lines] = buf;
2310 sub->endpts[sub->lines] = endpts; 2375 sub->endpts[sub->lines] = endpts;
2311 for (i = 0; i < len && pos < MAX_SUBLINE; i++) { 2376
2312 char c = txt[i]; 2377 if (!strip_markup) {
2313 if (c == '<') comment |= 1; 2378 subassconvert_subrip(txt, buf, MAX_SUBLINE + 1);
2314 if (c == '{') comment |= 2; 2379 sub->text[sub->lines] = buf;
2315 if (comment) { 2380 } else {
2316 if (c == '}') comment &= ~2; 2381 for (i = 0; i < len && pos < MAX_SUBLINE; i++) {
2317 if (c == '>') comment &= ~1; 2382 char c = txt[i];
2318 continue; 2383 if (c == '<') comment |= 1;
2319 } 2384 if (c == '{') comment |= 2;
2320 if (pos == MAX_SUBLINE - 1) { 2385 if (comment) {
2321 i--; 2386 if (c == '}') comment &= ~2;
2322 c = 0; 2387 if (c == '>') comment &= ~1;
2323 } 2388 continue;
2324 if (c == '\\' && i + 1 < len) {
2325 c = txt[++i];
2326 if (c == 'n' || c == 'N') c = 0;
2327 }
2328 if (c == '\n' || c == '\r') c = 0;
2329 if (c) {
2330 double_newline = 0;
2331 buf[pos++] = c;
2332 } else if (!double_newline) {
2333 if (sub->lines >= SUB_MAX_TEXT - 1) {
2334 mp_msg(MSGT_VO, MSGL_WARN, "Too many subtitle lines\n");
2335 break;
2336 } 2389 }
2337 double_newline = 1; 2390 if (pos == MAX_SUBLINE - 1) {
2338 buf[pos] = 0; 2391 i--;
2339 sub->lines++; 2392 c = 0;
2340 pos = 0; 2393 }
2341 buf = malloc(MAX_SUBLINE + 1); 2394 if (c == '\\' && i + 1 < len) {
2342 sub->text[sub->lines] = buf; 2395 c = txt[++i];
2343 sub->endpts[sub->lines] = endpts; 2396 if (c == 'n' || c == 'N') c = 0;
2344 } 2397 }
2398 if (c == '\n' || c == '\r') c = 0;
2399 if (c) {
2400 double_newline = 0;
2401 buf[pos++] = c;
2402 } else if (!double_newline) {
2403 if (sub->lines >= SUB_MAX_TEXT - 1) {
2404 mp_msg(MSGT_VO, MSGL_WARN, "Too many subtitle lines\n");
2405 break;
2406 }
2407 double_newline = 1;
2408 buf[pos] = 0;
2409 sub->lines++;
2410 pos = 0;
2411 buf = malloc(MAX_SUBLINE + 1);
2412 sub->text[sub->lines] = buf;
2413 sub->endpts[sub->lines] = endpts;
2414 }
2415 }
2416 buf[pos] = 0;
2345 } 2417 }
2346 buf[pos] = 0;
2347 if (sub->lines < SUB_MAX_TEXT && 2418 if (sub->lines < SUB_MAX_TEXT &&
2348 strlen(sub->text[sub->lines])) 2419 strlen(sub->text[sub->lines]))
2349 sub->lines++; 2420 sub->lines++;
2350 #ifdef CONFIG_FRIBIDI 2421 #ifdef CONFIG_FRIBIDI
2351 sub = sub_fribidi(sub, sub_utf8, orig_lines); 2422 sub = sub_fribidi(sub, sub_utf8, orig_lines);