comparison subassconvert.c @ 31686:b41cbf02f854

subtitles: convert SRT/MicroDVD markup into ASS markup As the title says. If libass support is enabled, in SRT subs, HTML-style markup (<b>, </i>, </font>, etc.) is converted to ASS tags. Similarly, MicroDVD-style markup is converted. Patch by ubitux, ubitux AT gmail DOT com.
author greg
date Sun, 18 Jul 2010 22:00:48 +0000
parents
children f2b40b588792
comparison
equal deleted inserted replaced
31685:31b6397e3b28 31686:b41cbf02f854
1 /*
2 * Subtitles converter to SSA/ASS in order to allow special formatting
3 *
4 * This file is part of MPlayer.
5 *
6 * MPlayer is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * MPlayer is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 #include <string.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <stdarg.h>
26
27 #include "mp_msg.h"
28 #include "help_mp.h"
29 #include "bstr.h"
30 #include "subassconvert.h"
31 #include "libavutil/common.h"
32
33 struct line {
34 char *buf;
35 size_t bufsize;
36 size_t len;
37 };
38
39 #ifdef __GNUC__
40 static void append_text(struct line *dst, char *fmt, ...) __attribute__ ((format(printf, 2, 3)));
41 #endif
42
43 static void append_text(struct line *dst, char *fmt, ...)
44 {
45 va_list va;
46 int ret;
47
48 va_start(va, fmt);
49 ret = vsnprintf(dst->buf + dst->len, dst->bufsize - dst->len, fmt, va);
50 if (ret >= 0) {
51 dst->len += ret;
52 if (dst->len > dst->bufsize)
53 dst->len = dst->bufsize;
54 }
55 va_end(va);
56 }
57
58 static int indexof(const char *s, int c)
59 {
60 char *f = strchr(s, c);
61 return f ? (f - s) : -1;
62 }
63
64
65
66 /*
67 * SubRip
68 *
69 * Support basic tags (italic, bold, underline, strike-through)
70 * and font tag with size, color and face attributes.
71 *
72 */
73
74 struct font_tag {
75 struct bstr face;
76 int size;
77 uint32_t color;
78 };
79
80 static const struct tag_conv {
81 const char *from;
82 const char *to;
83 } subrip_basic_tags[] = {
84 {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
85 {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
86 {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
87 {"<s>", "{\\s1}"}, {"</s>", "{\\s0}"},
88 {"{", "\\{"}, {"}", "\\}"},
89 {"\n", "\\N"}
90 };
91
92 static const struct {
93 const char *s;
94 uint32_t v;
95 } subrip_web_colors[] = {
96 /* 16 named HTML colors in BGR format */
97 {"red", 0x0000ff}, {"blue", 0xff0000}, {"lime", 0x00ff00},
98 {"aqua", 0xffff00}, {"purple", 0x800080}, {"yellow", 0x00ffff},
99 {"fuchsia", 0xff00ff}, {"white", 0xffffff}, {"gray", 0x808080},
100 {"maroon", 0x000080}, {"olive", 0x008080}, {"black", 0x000000},
101 {"silver", 0xc0c0c0}, {"teal", 0x808000}, {"green", 0x008000},
102 {"navy", 0x800000}
103 };
104
105 #define SUBRIP_MAX_STACKED_FONT_TAGS 16
106 #define SUBRIP_FLAG_COLOR 0x01000000
107
108 /**
109 * \brief Convert SubRip lines into ASS markup
110 * \param orig original SubRip lines. The content will remain untouched.
111 * \param dest ASS markup destination buffer.
112 * \param dest_buffer_size maximum size for the destination buffer.
113 */
114 void subassconvert_subrip(const char *orig, char *dest, size_t dest_buffer_size)
115 {
116 /* line is not const to avoid warnings with strtol, etc.
117 * orig content won't be changed */
118 char *line = (char *)orig;
119 struct line new_line = {
120 .buf = dest,
121 .bufsize = dest_buffer_size,
122 };
123 struct font_tag font_stack[SUBRIP_MAX_STACKED_FONT_TAGS];
124 int sp = 0;
125
126 font_stack[0] = (struct font_tag){}; // type with all defaults
127 while (*line && new_line.len < new_line.bufsize - 1) {
128 char *orig_line = line;
129
130 for (int i = 0; i < FF_ARRAY_ELEMS(subrip_basic_tags); i++) {
131 const struct tag_conv *tag = &subrip_basic_tags[i];
132 int from_len = strlen(tag->from);
133 if (strncmp(line, tag->from, from_len) == 0) {
134 append_text(&new_line, "%s", tag->to);
135 line += from_len;
136 }
137 }
138
139 if (strncmp(line, "</font>", 7) == 0) {
140 /* Closing font tag */
141 line += 7;
142
143 if (sp > 0) {
144 struct font_tag *tag = &font_stack[sp];
145 struct font_tag *last_tag = &tag[-1];
146 sp--;
147
148 if (tag->size) {
149 if (!last_tag->size)
150 append_text(&new_line, "{\\fs}");
151 else if (last_tag->size != tag->size)
152 append_text(&new_line, "{\\fs%d}", last_tag->size);
153 }
154
155 if (tag->color & SUBRIP_FLAG_COLOR) {
156 if (!(last_tag->color & SUBRIP_FLAG_COLOR))
157 append_text(&new_line, "{\\c}");
158 else if (last_tag->color != tag->color)
159 append_text(&new_line, "{\\c&H%06X&}",
160 last_tag->color & 0xffffff);
161 }
162
163 if (tag->face.len) {
164 if (!last_tag->face.len)
165 append_text(&new_line, "{\\fn}");
166 else if (bstrcmp(last_tag->face, tag->face) != 0)
167 append_text(&new_line, "{\\fn%.*s}",
168 BSTR_P(last_tag->face));
169 }
170 }
171 } else if (strncmp(line, "<font ", 6) == 0
172 && sp + 1 < FF_ARRAY_ELEMS(font_stack)) {
173 /* Opening font tag */
174 char *potential_font_tag_start = line;
175 int len_backup = new_line.len;
176 struct font_tag *tag = &font_stack[sp + 1];
177 int has_valid_attr = 0;
178
179 *tag = tag[-1]; // keep values from previous tag
180 line += 6;
181
182 while (*line && *line != '>') {
183 if (strncmp(line, "size=\"", 6) == 0) {
184 line += 6;
185 tag->size = strtol(line, &line, 10);
186 if (*line != '"' || !tag->size)
187 break;
188 append_text(&new_line, "{\\fs%d}", tag->size);
189 has_valid_attr = 1;
190 } else if (strncmp(line, "color=\"", 7) == 0) {
191 line += 7;
192 if (*line == '#') {
193 // #RRGGBB format
194 line++;
195 tag->color = strtol(line, &line, 16) & 0x00ffffff;
196 if (*line != '"')
197 break;
198 tag->color = ((tag->color & 0xff) << 16) |
199 (tag->color & 0xff00) |
200 ((tag->color & 0xff0000) >> 16) |
201 SUBRIP_FLAG_COLOR;
202 } else {
203 // Standard web colors
204 int i, len = indexof(line, '"');
205 if (len <= 0)
206 break;
207 for (i = 0; i < FF_ARRAY_ELEMS(subrip_web_colors); i++) {
208 const char *color = subrip_web_colors[i].s;
209 if (strlen(color) == len
210 && strncasecmp(line, color, len) == 0) {
211 tag->color = SUBRIP_FLAG_COLOR | subrip_web_colors[i].v;
212 break;
213 }
214 }
215
216 if (i == FF_ARRAY_ELEMS(subrip_web_colors)) {
217 /* We didn't find any matching color */
218 line = strchr(line, '"'); // can't be NULL, see above
219 mp_msg(MSGT_SUBREADER, MSGL_WARN,
220 MSGTR_SUBTITLES_SubRip_UnknownFontColor, orig);
221 append_text(&new_line, "{\\c}");
222 line += 2;
223 continue;
224 }
225
226 line += len;
227 }
228 append_text(&new_line, "{\\c&H%06X&}", tag->color & 0xffffff);
229 has_valid_attr = 1;
230 } else if (strncmp(line, "face=\"", 6) == 0) {
231 /* Font face attribute */
232 int len;
233 line += 6;
234 len = indexof(line, '"');
235 if (len <= 0)
236 break;
237 tag->face.start = line;
238 tag->face.len = len;
239 line += len;
240 append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face));
241 has_valid_attr = 1;
242 }
243 line++;
244 }
245
246 if (!has_valid_attr || *line != '>') { /* Not valid font tag */
247 line = potential_font_tag_start;
248 new_line.len = len_backup;
249 } else {
250 sp++;
251 line++;
252 }
253 }
254
255 /* Tag conversion code didn't match */
256 if (line == orig_line)
257 new_line.buf[new_line.len++] = *line++;
258 }
259 new_line.buf[new_line.len] = 0;
260 }
261
262
263 /*
264 * MicroDVD
265 *
266 * Based on the specifications found here:
267 * https://trac.videolan.org/vlc/ticket/1825#comment:6
268 */
269
270 struct microdvd_tag {
271 char key;
272 int persistent;
273 uint32_t data1;
274 uint32_t data2;
275 struct bstr data_string;
276 };
277
278 #define MICRODVD_PERSISTENT_OFF 0
279 #define MICRODVD_PERSISTENT_ON 1
280 #define MICRODVD_PERSISTENT_OPENED 2
281
282 // Color, Font, Size, cHarset, stYle, Position, cOordinate
283 #define MICRODVD_TAGS "cfshyYpo"
284
285 static void microdvd_set_tag(struct microdvd_tag *tags, struct microdvd_tag tag)
286 {
287 int tag_index = indexof(MICRODVD_TAGS, tag.key);
288
289 if (tag_index < 0)
290 return;
291 memcpy(&tags[tag_index], &tag, sizeof(tag));
292 }
293
294 // italic, bold, underline, strike-through
295 #define MICRODVD_STYLES "ibus"
296
297 static char *microdvd_load_tags(struct microdvd_tag *tags, char *s)
298 {
299 while (*s == '{') {
300 char *start = s;
301 char tag_char = *(s + 1);
302 struct microdvd_tag tag = {};
303
304 if (!tag_char || *(s + 2) != ':')
305 break;
306 s += 3;
307
308 switch (tag_char) {
309
310 /* Style */
311 case 'Y':
312 tag.persistent = MICRODVD_PERSISTENT_ON;
313 case 'y':
314 while (*s && *s != '}') {
315 int style_index = indexof(MICRODVD_STYLES, *s);
316
317 if (style_index >= 0)
318 tag.data1 |= (1 << style_index);
319 s++;
320 }
321 if (*s != '}')
322 break;
323 /* We must distinguish persistent and non-persistent styles
324 * to handle this kind of style tags: {y:ib}{Y:us} */
325 tag.key = tag_char;
326 break;
327
328 /* Color */
329 case 'C':
330 tag.persistent = MICRODVD_PERSISTENT_ON;
331 case 'c':
332 tag.data1 = strtol(s, &s, 16) & 0x00ffffff;
333 if (*s != '}')
334 break;
335 tag.key = 'c';
336 break;
337
338 /* Font name */
339 case 'F':
340 tag.persistent = MICRODVD_PERSISTENT_ON;
341 case 'f':
342 {
343 int len = indexof(s, '}');
344 if (len < 0)
345 break;
346 tag.data_string.start = s;
347 tag.data_string.len = len;
348 s += len;
349 tag.key = 'f';
350 break;
351 }
352
353 /* Font size */
354 case 'S':
355 tag.persistent = MICRODVD_PERSISTENT_ON;
356 case 's':
357 tag.data1 = strtol(s, &s, 10);
358 if (*s != '}')
359 break;
360 tag.key = 's';
361 break;
362
363 /* Charset */
364 case 'H':
365 {
366 //TODO: not yet handled, just parsed.
367 int len = indexof(s, '}');
368 if (len < 0)
369 break;
370 tag.data_string.start = s;
371 tag.data_string.len = len;
372 s += len;
373 tag.key = 'h';
374 break;
375 }
376
377 /* Position */
378 case 'P':
379 tag.persistent = MICRODVD_PERSISTENT_ON;
380 tag.data1 = (*s++ == '1');
381 if (*s != '}')
382 break;
383 tag.key = 'p';
384 break;
385
386 /* Coordinates */
387 case 'o':
388 tag.persistent = MICRODVD_PERSISTENT_ON;
389 tag.data1 = strtol(s, &s, 10);
390 if (*s != ',')
391 break;
392 s++;
393 tag.data2 = strtol(s, &s, 10);
394 if (*s != '}')
395 break;
396 tag.key = 'o';
397 break;
398
399 default: /* Unknown tag, we consider it's text */
400 break;
401 }
402
403 if (tag.key == 0)
404 return start;
405
406 microdvd_set_tag(tags, tag);
407 s++;
408 }
409 return s;
410 }
411
412 static void microdvd_open_tags(struct line *new_line, struct microdvd_tag *tags)
413 {
414 for (int i = 0; i < sizeof(MICRODVD_TAGS) - 1; i++) {
415 if (tags[i].persistent == MICRODVD_PERSISTENT_OPENED)
416 continue;
417 switch (tags[i].key) {
418 case 'Y':
419 case 'y':
420 for (int sidx = 0; sidx < sizeof(MICRODVD_STYLES) - 1; sidx++)
421 if (tags[i].data1 & (1 << sidx))
422 append_text(new_line, "{\\%c1}", MICRODVD_STYLES[sidx]);
423 break;
424
425 case 'c':
426 append_text(new_line, "{\\c&H%06X&}", tags[i].data1);
427 break;
428
429 case 'f':
430 append_text(new_line, "{\\fn%.*s}", BSTR_P(tags[i].data_string));
431 break;
432
433 case 's':
434 append_text(new_line, "{\\fs%d}", tags[i].data1);
435 break;
436
437 case 'p':
438 if (tags[i].data1 == 0)
439 append_text(new_line, "{\\an8}");
440 break;
441
442 case 'o':
443 append_text(new_line, "{\\pos(%d,%d)}",
444 tags[i].data1, tags[i].data2);
445 break;
446 }
447 if (tags[i].persistent == MICRODVD_PERSISTENT_ON)
448 tags[i].persistent = MICRODVD_PERSISTENT_OPENED;
449 }
450 }
451
452 static void microdvd_close_no_persistent_tags(struct line *new_line,
453 struct microdvd_tag *tags)
454 {
455 int i;
456
457 for (i = sizeof(MICRODVD_TAGS) - 2; i; i--) {
458 if (tags[i].persistent != MICRODVD_PERSISTENT_OFF)
459 continue;
460 switch (tags[i].key) {
461
462 case 'y':
463 for (int sidx = sizeof(MICRODVD_STYLES) - 2; sidx >= 0; sidx--)
464 if (tags[i].data1 & (1 << sidx))
465 append_text(new_line, "{\\%c0}", MICRODVD_STYLES[sidx]);
466 break;
467
468 case 'c':
469 append_text(new_line, "{\\c}");
470 break;
471
472 case 'f':
473 append_text(new_line, "{\\fn}");
474 break;
475
476 case 's':
477 append_text(new_line, "{\\fs}");
478 break;
479 }
480 tags[i].key = 0;
481 }
482 }
483
484 /**
485 * \brief Convert MicroDVD lines into ASS markup
486 * \param orig original MicroDVD line. The content will remain untouched.
487 * \param dest ASS markup destination buffer.
488 * \param dest_buffer_size maximum size for the destination buffer.
489 */
490 void subassconvert_microdvd(const char *orig, char *dest, size_t dest_buffer_size)
491 {
492 /* line is not const to avoid warnings with strtol, etc.
493 * orig content won't be changed */
494 char *line = (char *)orig;
495 struct line new_line = {
496 .buf = dest,
497 .bufsize = dest_buffer_size,
498 };
499 struct microdvd_tag tags[sizeof(MICRODVD_TAGS) - 1] = {};
500
501 while (*line) {
502 line = microdvd_load_tags(tags, line);
503 microdvd_open_tags(&new_line, tags);
504
505 while (*line && *line != '|')
506 new_line.buf[new_line.len++] = *line++;
507
508 if (*line == '|') {
509 microdvd_close_no_persistent_tags(&new_line, tags);
510 append_text(&new_line, "\\N");
511 line++;
512 }
513 }
514 new_line.buf[new_line.len] = 0;
515 }