Mercurial > mplayer.hg
comparison subassconvert.c @ 31686:b41cbf02f854
subtitles: convert SRT/MicroDVD markup into ASS markup
As the title says. If libass support is enabled, in SRT subs, HTML-style
markup (<b>, </i>, </font>, etc.) is converted to ASS tags. Similarly,
MicroDVD-style markup is converted.
Patch by ubitux, ubitux AT gmail DOT com.
author | greg |
---|---|
date | Sun, 18 Jul 2010 22:00:48 +0000 |
parents | |
children | f2b40b588792 |
comparison
equal
deleted
inserted
replaced
31685:31b6397e3b28 | 31686:b41cbf02f854 |
---|---|
1 /* | |
2 * Subtitles converter to SSA/ASS in order to allow special formatting | |
3 * | |
4 * This file is part of MPlayer. | |
5 * | |
6 * MPlayer is free software; you can redistribute it and/or modify | |
7 * it under the terms of the GNU General Public License as published by | |
8 * the Free Software Foundation; either version 2 of the License, or | |
9 * (at your option) any later version. | |
10 * | |
11 * MPlayer is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 * GNU General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU General Public License along | |
17 * with MPlayer; if not, write to the Free Software Foundation, Inc., | |
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
19 */ | |
20 | |
21 #include <string.h> | |
22 #include <stdint.h> | |
23 #include <stdlib.h> | |
24 #include <stdio.h> | |
25 #include <stdarg.h> | |
26 | |
27 #include "mp_msg.h" | |
28 #include "help_mp.h" | |
29 #include "bstr.h" | |
30 #include "subassconvert.h" | |
31 #include "libavutil/common.h" | |
32 | |
33 struct line { | |
34 char *buf; | |
35 size_t bufsize; | |
36 size_t len; | |
37 }; | |
38 | |
39 #ifdef __GNUC__ | |
40 static void append_text(struct line *dst, char *fmt, ...) __attribute__ ((format(printf, 2, 3))); | |
41 #endif | |
42 | |
43 static void append_text(struct line *dst, char *fmt, ...) | |
44 { | |
45 va_list va; | |
46 int ret; | |
47 | |
48 va_start(va, fmt); | |
49 ret = vsnprintf(dst->buf + dst->len, dst->bufsize - dst->len, fmt, va); | |
50 if (ret >= 0) { | |
51 dst->len += ret; | |
52 if (dst->len > dst->bufsize) | |
53 dst->len = dst->bufsize; | |
54 } | |
55 va_end(va); | |
56 } | |
57 | |
58 static int indexof(const char *s, int c) | |
59 { | |
60 char *f = strchr(s, c); | |
61 return f ? (f - s) : -1; | |
62 } | |
63 | |
64 | |
65 | |
66 /* | |
67 * SubRip | |
68 * | |
69 * Support basic tags (italic, bold, underline, strike-through) | |
70 * and font tag with size, color and face attributes. | |
71 * | |
72 */ | |
73 | |
74 struct font_tag { | |
75 struct bstr face; | |
76 int size; | |
77 uint32_t color; | |
78 }; | |
79 | |
80 static const struct tag_conv { | |
81 const char *from; | |
82 const char *to; | |
83 } subrip_basic_tags[] = { | |
84 {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"}, | |
85 {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"}, | |
86 {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"}, | |
87 {"<s>", "{\\s1}"}, {"</s>", "{\\s0}"}, | |
88 {"{", "\\{"}, {"}", "\\}"}, | |
89 {"\n", "\\N"} | |
90 }; | |
91 | |
92 static const struct { | |
93 const char *s; | |
94 uint32_t v; | |
95 } subrip_web_colors[] = { | |
96 /* 16 named HTML colors in BGR format */ | |
97 {"red", 0x0000ff}, {"blue", 0xff0000}, {"lime", 0x00ff00}, | |
98 {"aqua", 0xffff00}, {"purple", 0x800080}, {"yellow", 0x00ffff}, | |
99 {"fuchsia", 0xff00ff}, {"white", 0xffffff}, {"gray", 0x808080}, | |
100 {"maroon", 0x000080}, {"olive", 0x008080}, {"black", 0x000000}, | |
101 {"silver", 0xc0c0c0}, {"teal", 0x808000}, {"green", 0x008000}, | |
102 {"navy", 0x800000} | |
103 }; | |
104 | |
105 #define SUBRIP_MAX_STACKED_FONT_TAGS 16 | |
106 #define SUBRIP_FLAG_COLOR 0x01000000 | |
107 | |
108 /** | |
109 * \brief Convert SubRip lines into ASS markup | |
110 * \param orig original SubRip lines. The content will remain untouched. | |
111 * \param dest ASS markup destination buffer. | |
112 * \param dest_buffer_size maximum size for the destination buffer. | |
113 */ | |
114 void subassconvert_subrip(const char *orig, char *dest, size_t dest_buffer_size) | |
115 { | |
116 /* line is not const to avoid warnings with strtol, etc. | |
117 * orig content won't be changed */ | |
118 char *line = (char *)orig; | |
119 struct line new_line = { | |
120 .buf = dest, | |
121 .bufsize = dest_buffer_size, | |
122 }; | |
123 struct font_tag font_stack[SUBRIP_MAX_STACKED_FONT_TAGS]; | |
124 int sp = 0; | |
125 | |
126 font_stack[0] = (struct font_tag){}; // type with all defaults | |
127 while (*line && new_line.len < new_line.bufsize - 1) { | |
128 char *orig_line = line; | |
129 | |
130 for (int i = 0; i < FF_ARRAY_ELEMS(subrip_basic_tags); i++) { | |
131 const struct tag_conv *tag = &subrip_basic_tags[i]; | |
132 int from_len = strlen(tag->from); | |
133 if (strncmp(line, tag->from, from_len) == 0) { | |
134 append_text(&new_line, "%s", tag->to); | |
135 line += from_len; | |
136 } | |
137 } | |
138 | |
139 if (strncmp(line, "</font>", 7) == 0) { | |
140 /* Closing font tag */ | |
141 line += 7; | |
142 | |
143 if (sp > 0) { | |
144 struct font_tag *tag = &font_stack[sp]; | |
145 struct font_tag *last_tag = &tag[-1]; | |
146 sp--; | |
147 | |
148 if (tag->size) { | |
149 if (!last_tag->size) | |
150 append_text(&new_line, "{\\fs}"); | |
151 else if (last_tag->size != tag->size) | |
152 append_text(&new_line, "{\\fs%d}", last_tag->size); | |
153 } | |
154 | |
155 if (tag->color & SUBRIP_FLAG_COLOR) { | |
156 if (!(last_tag->color & SUBRIP_FLAG_COLOR)) | |
157 append_text(&new_line, "{\\c}"); | |
158 else if (last_tag->color != tag->color) | |
159 append_text(&new_line, "{\\c&H%06X&}", | |
160 last_tag->color & 0xffffff); | |
161 } | |
162 | |
163 if (tag->face.len) { | |
164 if (!last_tag->face.len) | |
165 append_text(&new_line, "{\\fn}"); | |
166 else if (bstrcmp(last_tag->face, tag->face) != 0) | |
167 append_text(&new_line, "{\\fn%.*s}", | |
168 BSTR_P(last_tag->face)); | |
169 } | |
170 } | |
171 } else if (strncmp(line, "<font ", 6) == 0 | |
172 && sp + 1 < FF_ARRAY_ELEMS(font_stack)) { | |
173 /* Opening font tag */ | |
174 char *potential_font_tag_start = line; | |
175 int len_backup = new_line.len; | |
176 struct font_tag *tag = &font_stack[sp + 1]; | |
177 int has_valid_attr = 0; | |
178 | |
179 *tag = tag[-1]; // keep values from previous tag | |
180 line += 6; | |
181 | |
182 while (*line && *line != '>') { | |
183 if (strncmp(line, "size=\"", 6) == 0) { | |
184 line += 6; | |
185 tag->size = strtol(line, &line, 10); | |
186 if (*line != '"' || !tag->size) | |
187 break; | |
188 append_text(&new_line, "{\\fs%d}", tag->size); | |
189 has_valid_attr = 1; | |
190 } else if (strncmp(line, "color=\"", 7) == 0) { | |
191 line += 7; | |
192 if (*line == '#') { | |
193 // #RRGGBB format | |
194 line++; | |
195 tag->color = strtol(line, &line, 16) & 0x00ffffff; | |
196 if (*line != '"') | |
197 break; | |
198 tag->color = ((tag->color & 0xff) << 16) | | |
199 (tag->color & 0xff00) | | |
200 ((tag->color & 0xff0000) >> 16) | | |
201 SUBRIP_FLAG_COLOR; | |
202 } else { | |
203 // Standard web colors | |
204 int i, len = indexof(line, '"'); | |
205 if (len <= 0) | |
206 break; | |
207 for (i = 0; i < FF_ARRAY_ELEMS(subrip_web_colors); i++) { | |
208 const char *color = subrip_web_colors[i].s; | |
209 if (strlen(color) == len | |
210 && strncasecmp(line, color, len) == 0) { | |
211 tag->color = SUBRIP_FLAG_COLOR | subrip_web_colors[i].v; | |
212 break; | |
213 } | |
214 } | |
215 | |
216 if (i == FF_ARRAY_ELEMS(subrip_web_colors)) { | |
217 /* We didn't find any matching color */ | |
218 line = strchr(line, '"'); // can't be NULL, see above | |
219 mp_msg(MSGT_SUBREADER, MSGL_WARN, | |
220 MSGTR_SUBTITLES_SubRip_UnknownFontColor, orig); | |
221 append_text(&new_line, "{\\c}"); | |
222 line += 2; | |
223 continue; | |
224 } | |
225 | |
226 line += len; | |
227 } | |
228 append_text(&new_line, "{\\c&H%06X&}", tag->color & 0xffffff); | |
229 has_valid_attr = 1; | |
230 } else if (strncmp(line, "face=\"", 6) == 0) { | |
231 /* Font face attribute */ | |
232 int len; | |
233 line += 6; | |
234 len = indexof(line, '"'); | |
235 if (len <= 0) | |
236 break; | |
237 tag->face.start = line; | |
238 tag->face.len = len; | |
239 line += len; | |
240 append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face)); | |
241 has_valid_attr = 1; | |
242 } | |
243 line++; | |
244 } | |
245 | |
246 if (!has_valid_attr || *line != '>') { /* Not valid font tag */ | |
247 line = potential_font_tag_start; | |
248 new_line.len = len_backup; | |
249 } else { | |
250 sp++; | |
251 line++; | |
252 } | |
253 } | |
254 | |
255 /* Tag conversion code didn't match */ | |
256 if (line == orig_line) | |
257 new_line.buf[new_line.len++] = *line++; | |
258 } | |
259 new_line.buf[new_line.len] = 0; | |
260 } | |
261 | |
262 | |
263 /* | |
264 * MicroDVD | |
265 * | |
266 * Based on the specifications found here: | |
267 * https://trac.videolan.org/vlc/ticket/1825#comment:6 | |
268 */ | |
269 | |
270 struct microdvd_tag { | |
271 char key; | |
272 int persistent; | |
273 uint32_t data1; | |
274 uint32_t data2; | |
275 struct bstr data_string; | |
276 }; | |
277 | |
278 #define MICRODVD_PERSISTENT_OFF 0 | |
279 #define MICRODVD_PERSISTENT_ON 1 | |
280 #define MICRODVD_PERSISTENT_OPENED 2 | |
281 | |
282 // Color, Font, Size, cHarset, stYle, Position, cOordinate | |
283 #define MICRODVD_TAGS "cfshyYpo" | |
284 | |
285 static void microdvd_set_tag(struct microdvd_tag *tags, struct microdvd_tag tag) | |
286 { | |
287 int tag_index = indexof(MICRODVD_TAGS, tag.key); | |
288 | |
289 if (tag_index < 0) | |
290 return; | |
291 memcpy(&tags[tag_index], &tag, sizeof(tag)); | |
292 } | |
293 | |
294 // italic, bold, underline, strike-through | |
295 #define MICRODVD_STYLES "ibus" | |
296 | |
297 static char *microdvd_load_tags(struct microdvd_tag *tags, char *s) | |
298 { | |
299 while (*s == '{') { | |
300 char *start = s; | |
301 char tag_char = *(s + 1); | |
302 struct microdvd_tag tag = {}; | |
303 | |
304 if (!tag_char || *(s + 2) != ':') | |
305 break; | |
306 s += 3; | |
307 | |
308 switch (tag_char) { | |
309 | |
310 /* Style */ | |
311 case 'Y': | |
312 tag.persistent = MICRODVD_PERSISTENT_ON; | |
313 case 'y': | |
314 while (*s && *s != '}') { | |
315 int style_index = indexof(MICRODVD_STYLES, *s); | |
316 | |
317 if (style_index >= 0) | |
318 tag.data1 |= (1 << style_index); | |
319 s++; | |
320 } | |
321 if (*s != '}') | |
322 break; | |
323 /* We must distinguish persistent and non-persistent styles | |
324 * to handle this kind of style tags: {y:ib}{Y:us} */ | |
325 tag.key = tag_char; | |
326 break; | |
327 | |
328 /* Color */ | |
329 case 'C': | |
330 tag.persistent = MICRODVD_PERSISTENT_ON; | |
331 case 'c': | |
332 tag.data1 = strtol(s, &s, 16) & 0x00ffffff; | |
333 if (*s != '}') | |
334 break; | |
335 tag.key = 'c'; | |
336 break; | |
337 | |
338 /* Font name */ | |
339 case 'F': | |
340 tag.persistent = MICRODVD_PERSISTENT_ON; | |
341 case 'f': | |
342 { | |
343 int len = indexof(s, '}'); | |
344 if (len < 0) | |
345 break; | |
346 tag.data_string.start = s; | |
347 tag.data_string.len = len; | |
348 s += len; | |
349 tag.key = 'f'; | |
350 break; | |
351 } | |
352 | |
353 /* Font size */ | |
354 case 'S': | |
355 tag.persistent = MICRODVD_PERSISTENT_ON; | |
356 case 's': | |
357 tag.data1 = strtol(s, &s, 10); | |
358 if (*s != '}') | |
359 break; | |
360 tag.key = 's'; | |
361 break; | |
362 | |
363 /* Charset */ | |
364 case 'H': | |
365 { | |
366 //TODO: not yet handled, just parsed. | |
367 int len = indexof(s, '}'); | |
368 if (len < 0) | |
369 break; | |
370 tag.data_string.start = s; | |
371 tag.data_string.len = len; | |
372 s += len; | |
373 tag.key = 'h'; | |
374 break; | |
375 } | |
376 | |
377 /* Position */ | |
378 case 'P': | |
379 tag.persistent = MICRODVD_PERSISTENT_ON; | |
380 tag.data1 = (*s++ == '1'); | |
381 if (*s != '}') | |
382 break; | |
383 tag.key = 'p'; | |
384 break; | |
385 | |
386 /* Coordinates */ | |
387 case 'o': | |
388 tag.persistent = MICRODVD_PERSISTENT_ON; | |
389 tag.data1 = strtol(s, &s, 10); | |
390 if (*s != ',') | |
391 break; | |
392 s++; | |
393 tag.data2 = strtol(s, &s, 10); | |
394 if (*s != '}') | |
395 break; | |
396 tag.key = 'o'; | |
397 break; | |
398 | |
399 default: /* Unknown tag, we consider it's text */ | |
400 break; | |
401 } | |
402 | |
403 if (tag.key == 0) | |
404 return start; | |
405 | |
406 microdvd_set_tag(tags, tag); | |
407 s++; | |
408 } | |
409 return s; | |
410 } | |
411 | |
412 static void microdvd_open_tags(struct line *new_line, struct microdvd_tag *tags) | |
413 { | |
414 for (int i = 0; i < sizeof(MICRODVD_TAGS) - 1; i++) { | |
415 if (tags[i].persistent == MICRODVD_PERSISTENT_OPENED) | |
416 continue; | |
417 switch (tags[i].key) { | |
418 case 'Y': | |
419 case 'y': | |
420 for (int sidx = 0; sidx < sizeof(MICRODVD_STYLES) - 1; sidx++) | |
421 if (tags[i].data1 & (1 << sidx)) | |
422 append_text(new_line, "{\\%c1}", MICRODVD_STYLES[sidx]); | |
423 break; | |
424 | |
425 case 'c': | |
426 append_text(new_line, "{\\c&H%06X&}", tags[i].data1); | |
427 break; | |
428 | |
429 case 'f': | |
430 append_text(new_line, "{\\fn%.*s}", BSTR_P(tags[i].data_string)); | |
431 break; | |
432 | |
433 case 's': | |
434 append_text(new_line, "{\\fs%d}", tags[i].data1); | |
435 break; | |
436 | |
437 case 'p': | |
438 if (tags[i].data1 == 0) | |
439 append_text(new_line, "{\\an8}"); | |
440 break; | |
441 | |
442 case 'o': | |
443 append_text(new_line, "{\\pos(%d,%d)}", | |
444 tags[i].data1, tags[i].data2); | |
445 break; | |
446 } | |
447 if (tags[i].persistent == MICRODVD_PERSISTENT_ON) | |
448 tags[i].persistent = MICRODVD_PERSISTENT_OPENED; | |
449 } | |
450 } | |
451 | |
452 static void microdvd_close_no_persistent_tags(struct line *new_line, | |
453 struct microdvd_tag *tags) | |
454 { | |
455 int i; | |
456 | |
457 for (i = sizeof(MICRODVD_TAGS) - 2; i; i--) { | |
458 if (tags[i].persistent != MICRODVD_PERSISTENT_OFF) | |
459 continue; | |
460 switch (tags[i].key) { | |
461 | |
462 case 'y': | |
463 for (int sidx = sizeof(MICRODVD_STYLES) - 2; sidx >= 0; sidx--) | |
464 if (tags[i].data1 & (1 << sidx)) | |
465 append_text(new_line, "{\\%c0}", MICRODVD_STYLES[sidx]); | |
466 break; | |
467 | |
468 case 'c': | |
469 append_text(new_line, "{\\c}"); | |
470 break; | |
471 | |
472 case 'f': | |
473 append_text(new_line, "{\\fn}"); | |
474 break; | |
475 | |
476 case 's': | |
477 append_text(new_line, "{\\fs}"); | |
478 break; | |
479 } | |
480 tags[i].key = 0; | |
481 } | |
482 } | |
483 | |
484 /** | |
485 * \brief Convert MicroDVD lines into ASS markup | |
486 * \param orig original MicroDVD line. The content will remain untouched. | |
487 * \param dest ASS markup destination buffer. | |
488 * \param dest_buffer_size maximum size for the destination buffer. | |
489 */ | |
490 void subassconvert_microdvd(const char *orig, char *dest, size_t dest_buffer_size) | |
491 { | |
492 /* line is not const to avoid warnings with strtol, etc. | |
493 * orig content won't be changed */ | |
494 char *line = (char *)orig; | |
495 struct line new_line = { | |
496 .buf = dest, | |
497 .bufsize = dest_buffer_size, | |
498 }; | |
499 struct microdvd_tag tags[sizeof(MICRODVD_TAGS) - 1] = {}; | |
500 | |
501 while (*line) { | |
502 line = microdvd_load_tags(tags, line); | |
503 microdvd_open_tags(&new_line, tags); | |
504 | |
505 while (*line && *line != '|') | |
506 new_line.buf[new_line.len++] = *line++; | |
507 | |
508 if (*line == '|') { | |
509 microdvd_close_no_persistent_tags(&new_line, tags); | |
510 append_text(&new_line, "\\N"); | |
511 line++; | |
512 } | |
513 } | |
514 new_line.buf[new_line.len] = 0; | |
515 } |