Mercurial > mplayer.hg
annotate subreader.c @ 1934:b024d0c98457
increased max msg size
author | arpi |
---|---|
date | Sat, 22 Sep 2001 15:43:52 +0000 |
parents | a9e51734ea00 |
children | 885c9c802373 |
rev | line source |
---|---|
258 | 1 /* |
2 * Subtitle reader with format autodetection | |
3 * | |
4 * Written by laaz | |
5 * Some code cleanup & realloc() by A'rpi/ESP-team | |
1081 | 6 * dunnowhat sub format by szabi |
258 | 7 */ |
8 | |
9 | |
10 #include <stdio.h> | |
11 #include <stdlib.h> | |
12 #include <string.h> | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
13 #include <ctype.h> |
258 | 14 |
15 #include "subreader.h" | |
16 | |
17 #define ERR (void *)-1 | |
18 | |
19 | |
20 int sub_uses_time=0; | |
21 int sub_errs=0; | |
624 | 22 int sub_num=0; // number of subtitle structs |
23 int sub_format=-1; // 0 for microdvd | |
24 // 1 for SubRip | |
921 | 25 // 2 for SubViewer |
624 | 26 // 3 for SAMI (smi) |
818 | 27 // 4 for vplayer format |
850 | 28 // 5 for RT format |
921 | 29 // 6 for ssa (Sub Station Alpha) |
1081 | 30 // 7 for ... erm ... dunnowhat. tell me if you know |
624 | 31 |
32 int eol(char p) { | |
33 return (p=='\r' || p=='\n' || p=='\0'); | |
34 } | |
35 | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
36 static inline void trail_space(char *s) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
37 int i; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
38 while (isspace(*s)) strcpy(s, s + 1); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
39 i = strlen(s) - 1; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
40 while (i > 0 && isspace(s[i])) s[i--] = '\0'; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
41 } |
624 | 42 |
43 subtitle *sub_read_line_sami(FILE *fd, subtitle *current) { | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
44 static char line[1001]; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
45 static char *s = NULL; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
46 char text[1000], *p, *q; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
47 int state; |
624 | 48 |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
49 current->lines = current->start = current->end = 0; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
50 state = 0; |
624 | 51 |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
52 /* read the first line */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
53 if (!s) |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
54 if (!(s = fgets(line, 1000, fd))) return 0; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
55 |
624 | 56 do { |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
57 switch (state) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
58 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
59 case 0: /* find "START=" */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
60 s = strstr (s, "Start="); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
61 if (s) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
62 current->start = strtol (s + 6, &s, 0) / 10; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
63 state = 1; continue; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
64 } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
65 break; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
66 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
67 case 1: /* find "<P" */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
68 if ((s = strstr (s, "<P"))) { s += 2; state = 2; continue; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
69 break; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
70 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
71 case 2: /* find ">" */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
72 if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
73 break; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
74 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
75 case 3: /* get all text until '<' appears */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
76 if (*s == '\0') { break; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
77 else if (*s == '<') { state = 4; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
78 else if (!strncasecmp (s, " ", 6)) { *p++ = ' '; s += 6; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
79 else if (*s == '\r') { s++; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
80 else if (!strncasecmp (s, "<br>", 4) || *s == '\n') { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
81 *p = '\0'; p = text; trail_space (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
82 if (text[0] != '\0') |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
83 current->text[current->lines++] = strdup (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
84 if (*s == '\n') s++; else s += 4; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
85 } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
86 else *p++ = *s++; |
624 | 87 continue; |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
88 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
89 case 4: /* get current->end or skip <TAG> */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
90 q = strstr (s, "Start="); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
91 if (q) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
92 current->end = strtol (q + 6, &q, 0) / 10 - 1; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
93 *p = '\0'; trail_space (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
94 if (text[0] != '\0') |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
95 current->text[current->lines++] = strdup (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
96 if (current->lines > 0) { state = 99; break; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
97 state = 0; continue; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
98 } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
99 s = strchr (s, '>'); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
100 if (s) { s++; state = 3; continue; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
101 break; |
624 | 102 } |
103 | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
104 /* read next line */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
105 if (state != 99 && !(s = fgets (line, 1000, fd))) return 0; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
106 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
107 } while (state != 99); |
624 | 108 |
109 return current; | |
110 } | |
258 | 111 |
112 | |
113 char *sub_readtext(char *source, char **dest) { | |
114 int len=0; | |
932 | 115 char *p=source; |
258 | 116 |
932 | 117 while ( !eol(*p) && *p!= '|' ) { |
118 p++,len++; | |
119 } | |
258 | 120 |
121 *dest= (char *)malloc (len+1); | |
122 if (!dest) {return ERR;} | |
123 | |
124 strncpy(*dest, source, len); | |
125 (*dest)[len]=0; | |
126 | |
127 while (*p=='\r' || *p=='\n' || *p=='|') p++; | |
128 | |
129 if (*p) return p; // not-last text field | |
130 else return NULL; // last text field | |
131 } | |
132 | |
133 subtitle *sub_read_line_microdvd(FILE *fd,subtitle *current) { | |
134 char line[1001]; | |
135 char line2[1001]; | |
136 char *p, *next; | |
137 int i; | |
138 | |
1764 | 139 bzero (current, sizeof(subtitle)); |
258 | 140 |
141 do { | |
142 if (!fgets (line, 1000, fd)) return NULL; | |
932 | 143 } while (sscanf (line, "{%ld}{%ld}%[^\r\n]", &(current->start), &(current->end),line2) <3); |
258 | 144 |
932 | 145 p=line2; |
258 | 146 |
147 next=p, i=0; | |
1081 | 148 while ((next =sub_readtext (next, &(current->text[i])))) { |
270 | 149 if (current->text[i]==ERR) {return ERR;} |
258 | 150 i++; |
1081 | 151 if (i>=SUB_MAX_TEXT) { printf ("Too many lines in a subtitle\n");current->lines=i;return current;} |
258 | 152 } |
932 | 153 current->lines= ++i; |
258 | 154 |
155 return current; | |
156 } | |
157 | |
158 subtitle *sub_read_line_subrip(FILE *fd, subtitle *current) { | |
159 char line[1001]; | |
160 int a1,a2,a3,a4,b1,b2,b3,b4; | |
161 char *p=NULL, *q=NULL; | |
162 int len; | |
163 | |
1764 | 164 bzero (current, sizeof(subtitle)); |
258 | 165 |
1764 | 166 while (1) { |
258 | 167 if (!fgets (line, 1000, fd)) return NULL; |
269 | 168 if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) continue; |
258 | 169 current->start = a1*360000+a2*6000+a3*100+a4; |
170 current->end = b1*360000+b2*6000+b3*100+b4; | |
171 | |
172 if (!fgets (line, 1000, fd)) return NULL; | |
173 | |
174 p=q=line; | |
175 for (current->lines=1; current->lines < SUB_MAX_TEXT; current->lines++) { | |
176 for (q=p,len=0; *p && *p!='\r' && *p!='\n' && strncmp(p,"[br]",4); p++,len++); | |
177 current->text[current->lines-1]=(char *)malloc (len+1); | |
178 if (!current->text[current->lines-1]) return ERR; | |
179 strncpy (current->text[current->lines-1], q, len); | |
270 | 180 current->text[current->lines-1][len]='\0'; |
258 | 181 if (!*p || *p=='\r' || *p=='\n') break; |
182 while (*p++!=']'); | |
183 } | |
1764 | 184 break; |
258 | 185 } |
186 return current; | |
187 } | |
188 | |
189 subtitle *sub_read_line_third(FILE *fd,subtitle *current) { | |
190 char line[1001]; | |
191 int a1,a2,a3,a4,b1,b2,b3,b4; | |
192 char *p=NULL; | |
193 int i,len; | |
194 | |
1764 | 195 bzero (current, sizeof(subtitle)); |
258 | 196 |
197 while (!current->text[0]) { | |
198 if (!fgets (line, 1000, fd)) return NULL; | |
269 | 199 if ((len=sscanf (line, "%d:%d:%d,%d --> %d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8) |
258 | 200 continue; |
201 current->start = a1*360000+a2*6000+a3*100+a4/10; | |
202 current->end = b1*360000+b2*6000+b3*100+b4/10; | |
203 for (i=0; i<SUB_MAX_TEXT;) { | |
269 | 204 if (!fgets (line, 1000, fd)) break; |
258 | 205 len=0; |
206 for (p=line; *p!='\n' && *p!='\r' && *p; p++,len++); | |
207 if (len) { | |
208 current->text[i]=(char *)malloc (len+1); | |
209 if (!current->text[i]) return ERR; | |
270 | 210 strncpy (current->text[i], line, len); current->text[i][len]='\0'; |
258 | 211 i++; |
212 } else { | |
213 break; | |
214 } | |
215 } | |
216 current->lines=i; | |
217 } | |
218 return current; | |
219 } | |
220 | |
818 | 221 subtitle *sub_read_line_vplayer(FILE *fd,subtitle *current) { |
222 char line[1001]; | |
223 char line2[1001]; | |
224 int a1,a2,a3,b1,b2,b3; | |
1081 | 225 char *p=NULL, *next; |
858 | 226 int i,len,len2,plen; |
818 | 227 |
1764 | 228 bzero (current, sizeof(subtitle)); |
818 | 229 |
230 while (!current->text[0]) { | |
231 if (!fgets (line, 1000, fd)) return NULL; | |
858 | 232 if ((len=sscanf (line, "%d:%d:%d:%n",&a1,&a2,&a3,&plen)) < 3) |
818 | 233 continue; |
234 if (!fgets (line2, 1000, fd)) return NULL; | |
235 if ((len2=sscanf (line2, "%d:%d:%d:",&b1,&b2,&b3)) < 3) | |
236 continue; | |
237 // przewiñ o linijkê do ty³u: | |
238 fseek(fd,-strlen(line2),SEEK_CUR); | |
239 | |
240 current->start = a1*360000+a2*6000+a3*100; | |
241 current->end = b1*360000+b2*6000+b3*100; | |
896
d46de26aef48
there is another format that get detected as vplayers.
eyck
parents:
892
diff
changeset
|
242 if ((current->end - current->start) > 1000) {current->end = current->start + 1000;} // not too long though. |
818 | 243 // teraz czas na wkopiowanie stringu |
1640
cbedcfab877b
Fixup to vplayer subtitle submitted to sourceforge by Igor Wojnicki
eyck
parents:
1501
diff
changeset
|
244 p=line; |
cbedcfab877b
Fixup to vplayer subtitle submitted to sourceforge by Igor Wojnicki
eyck
parents:
1501
diff
changeset
|
245 // finds the body of the subtitle |
cbedcfab877b
Fixup to vplayer subtitle submitted to sourceforge by Igor Wojnicki
eyck
parents:
1501
diff
changeset
|
246 for (i=0; i<3; i++){ |
cbedcfab877b
Fixup to vplayer subtitle submitted to sourceforge by Igor Wojnicki
eyck
parents:
1501
diff
changeset
|
247 p=strchr(p,':')+1; |
cbedcfab877b
Fixup to vplayer subtitle submitted to sourceforge by Igor Wojnicki
eyck
parents:
1501
diff
changeset
|
248 } |
cbedcfab877b
Fixup to vplayer subtitle submitted to sourceforge by Igor Wojnicki
eyck
parents:
1501
diff
changeset
|
249 i=0; |
cbedcfab877b
Fixup to vplayer subtitle submitted to sourceforge by Igor Wojnicki
eyck
parents:
1501
diff
changeset
|
250 |
818 | 251 if (*p!='|') { |
252 // | |
253 next = p,i=0; | |
254 while ((next =sub_readtext (next, &(current->text[i])))) { | |
255 if (current->text[i]==ERR) {return ERR;} | |
256 i++; | |
1081 | 257 if (i>=SUB_MAX_TEXT) { printf ("Too many lines in a subtitle\n");current->lines=i;return current;} |
818 | 258 } |
259 current->lines=i+1; | |
260 } | |
261 } | |
262 return current; | |
263 } | |
264 | |
850 | 265 subtitle *sub_read_line_rt(FILE *fd,subtitle *current) { |
266 //TODO: This format uses quite rich (sub/super)set of xhtml | |
267 // I couldn't check it since DTD is not included. | |
268 // WARNING: full XML parses can be required for proper parsing | |
269 char line[1001]; | |
270 int a1,a2,a3,a4,b1,b2,b3,b4; | |
271 char *p=NULL,*next=NULL; | |
272 int i,len,plen; | |
273 | |
1764 | 274 bzero (current, sizeof(subtitle)); |
850 | 275 |
276 while (!current->text[0]) { | |
277 if (!fgets (line, 1000, fd)) return NULL; | |
278 //TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0 | |
279 //to describe the same moment in time. Maybe there are even more formats in use. | |
280 //if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8) | |
281 plen=a1=a2=a3=a4=b1=b2=b3=b4=0; | |
282 if ( | |
283 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen)) < 4) && | |
284 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen)) < 5) && | |
285 // ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) && | |
286 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen)) < 6) && | |
287 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen)) < 8) | |
288 ) | |
289 continue; | |
290 current->start = a1*360000+a2*6000+a3*100+a4/10; | |
291 current->end = b1*360000+b2*6000+b3*100+b4/10; | |
292 p=line; p+=plen;i=0; | |
293 // TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? | |
294 next = strstr(line,"<clear/>")+8;i=0; | |
295 while ((next =sub_readtext (next, &(current->text[i])))) { | |
296 if (current->text[i]==ERR) {return ERR;} | |
297 i++; | |
1081 | 298 if (i>=SUB_MAX_TEXT) { printf ("Too many lines in a subtitle\n");current->lines=i;return current;} |
850 | 299 } |
300 current->lines=i+1; | |
301 } | |
302 return current; | |
303 } | |
304 | |
921 | 305 subtitle *sub_read_line_ssa(FILE *fd,subtitle *current) { |
306 int hour1, min1, sec1, hunsec1, | |
307 hour2, min2, sec2, hunsec2, nothing; | |
308 | |
309 char line[1000], | |
310 line2[1000]; | |
311 do { | |
312 if (!fgets (line, 1000, fd)) return NULL; | |
313 } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d," | |
314 "*Default,%d,%d,%d,%d,,%[^\n\r]", ¬hing, &hour1, &min1, | |
315 &sec1, &hunsec1, | |
316 &hour2, &min2, &sec2, &hunsec2, ¬hing, | |
317 ¬hing, ¬hing, ¬hing, line2) < 14); | |
318 current->lines=1; | |
319 current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1; | |
320 current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2; | |
321 current->text[0]=(char *) malloc(strlen(line2)+1); | |
322 strcpy(current->text[0],line2); | |
818 | 323 |
921 | 324 return current; |
325 } | |
258 | 326 |
1081 | 327 subtitle *sub_read_line_dunnowhat(FILE *fd,subtitle *current) { |
328 char line[1001]; | |
329 char text[1001]; | |
330 | |
1764 | 331 bzero (current, sizeof(subtitle)); |
1081 | 332 |
333 if (!fgets (line, 1000, fd)) | |
334 return NULL; | |
335 if (sscanf (line, "%ld,%ld,\"%[^\"]", &(current->start), | |
336 &(current->end), text) <3) | |
337 return ERR; | |
338 current->text[0] = strdup(text); | |
339 current->lines = 1; | |
340 | |
341 return current; | |
342 } | |
343 | |
258 | 344 int sub_autodetect (FILE *fd) { |
345 char line[1001]; | |
346 int i,j=0; | |
347 // char *p; | |
348 | |
624 | 349 while (j < 100) { |
258 | 350 j++; |
351 if (!fgets (line, 1000, fd)) | |
352 return -1; | |
353 | |
624 | 354 if (sscanf (line, "{%d}{%d}", &i, &i)==2) |
258 | 355 {sub_uses_time=0;return 0;} |
269 | 356 if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) |
258 | 357 {sub_uses_time=1;return 1;} |
269 | 358 if (sscanf (line, "%d:%d:%d,%d --> %d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) |
258 | 359 {sub_uses_time=1;return 2;} |
624 | 360 if (strstr (line, "<SAMI>")) |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
361 {sub_uses_time=1; return 3;} |
818 | 362 if (sscanf (line, "%d:%d:%d:", &i, &i, &i )==3) |
363 {sub_uses_time=1;return 4;} | |
850 | 364 //TODO: just checking if first line of sub starts with "<" is WAY |
913
18c43d261c35
corrected strcmp() bug, now it works again with every subs (it was broken)
laaz
parents:
896
diff
changeset
|
365 // too weak test for RT |
18c43d261c35
corrected strcmp() bug, now it works again with every subs (it was broken)
laaz
parents:
896
diff
changeset
|
366 // Please someone who knows the format of RT... FIX IT!!! |
921 | 367 // It may conflict with other sub formats in the future (actually it doesn't) |
913
18c43d261c35
corrected strcmp() bug, now it works again with every subs (it was broken)
laaz
parents:
896
diff
changeset
|
368 if ( *line == '<' ) |
850 | 369 {sub_uses_time=1;return 5;} |
921 | 370 |
371 // I have only seen only 1 piece of .ssa file. | |
372 // It may be not correct (tell me if it's not) | |
373 if (!memcmp(line, "Dialogue: Marked", 16)) | |
374 {sub_uses_time=1; return 6;} | |
1081 | 375 if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3) |
376 {sub_uses_time=0;return 7;} | |
258 | 377 } |
624 | 378 |
379 return -1; // too many bad lines | |
258 | 380 } |
381 | |
382 | |
383 subtitle* sub_read_file (char *filename) { | |
384 FILE *fd; | |
385 int n_max; | |
386 subtitle *first; | |
1081 | 387 subtitle * (*func[])(FILE *fd,subtitle *dest)= |
258 | 388 { |
389 sub_read_line_microdvd, | |
390 sub_read_line_subrip, | |
624 | 391 sub_read_line_third, |
818 | 392 sub_read_line_sami, |
850 | 393 sub_read_line_vplayer, |
921 | 394 sub_read_line_rt, |
1081 | 395 sub_read_line_ssa, |
396 sub_read_line_dunnowhat | |
258 | 397 }; |
398 | |
399 fd=fopen (filename, "r"); if (!fd) return NULL; | |
400 | |
401 sub_format=sub_autodetect (fd); | |
402 if (sub_format==-1) {printf ("SUB: Could not determine file format\n");return NULL;} | |
624 | 403 printf ("SUB: Detected subtitle file format: %d\n",sub_format); |
258 | 404 |
405 rewind (fd); | |
406 | |
407 sub_num=0;n_max=32; | |
408 first=(subtitle *)malloc(n_max*sizeof(subtitle)); | |
409 if(!first) return NULL; | |
410 | |
411 while(1){ | |
412 subtitle *sub; | |
413 if(sub_num>=n_max){ | |
414 n_max+=16; | |
415 first=realloc(first,n_max*sizeof(subtitle)); | |
416 } | |
417 sub=func[sub_format](fd,&first[sub_num]); | |
418 if(!sub) break; // EOF | |
419 if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid | |
420 } | |
421 | |
422 fclose(fd); | |
423 | |
424 // printf ("SUB: Subtitle format %s time.\n", sub_uses_time?"uses":"doesn't use"); | |
269 | 425 printf ("SUB: Read %i subtitles", sub_num); |
624 | 426 if (sub_errs) printf (", %i bad line(s).\n", sub_errs); |
269 | 427 else printf (".\n"); |
258 | 428 |
429 return first; | |
430 } | |
431 | |
892 | 432 #if 0 |
509 | 433 char * strreplace( char * in,char * what,char * whereof ) |
434 { | |
435 int i; | |
436 char * tmp; | |
437 | |
438 if ( ( in == NULL )||( what == NULL )||( whereof == NULL )||( ( tmp=strstr( in,what ) ) == NULL ) ) return NULL; | |
439 for( i=0;i<strlen( whereof );i++ ) tmp[i]=whereof[i]; | |
440 if ( strlen( what ) > strlen( whereof ) ) tmp[i]=0; | |
441 return in; | |
442 } | |
892 | 443 #endif |
509 | 444 |
892 | 445 char * sub_filename(char* path, char * fname ) |
509 | 446 { |
1501
d40f2b686846
changes according to -utf8 option, draw_osd() function added
atlka
parents:
1081
diff
changeset
|
447 extern int sub_utf8; |
892 | 448 char * sub_name1; |
449 char * sub_name2; | |
934
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
450 char * aviptr1, * aviptr2, * tmp; |
892 | 451 int i,j; |
452 FILE * f; | |
453 int pos=0; | |
454 char * sub_exts[] = | |
1501
d40f2b686846
changes according to -utf8 option, draw_osd() function added
atlka
parents:
1081
diff
changeset
|
455 { ".utf", |
d40f2b686846
changes according to -utf8 option, draw_osd() function added
atlka
parents:
1081
diff
changeset
|
456 ".UTF", |
d40f2b686846
changes according to -utf8 option, draw_osd() function added
atlka
parents:
1081
diff
changeset
|
457 ".sub", |
509 | 458 ".SUB", |
459 ".srt", | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
460 ".SRT", |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
461 ".smi", |
850 | 462 ".SMI", |
463 ".rt", | |
464 ".RT", | |
465 ".txt", | |
1081 | 466 ".TXT", |
467 ".ssa", | |
468 ".SSA"}; | |
892 | 469 |
934
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
470 |
509 | 471 if ( fname == NULL ) return NULL; |
892 | 472 |
473 sub_name1=strrchr(fname,'.'); | |
474 if (!sub_name1) return NULL; | |
475 pos=sub_name1-fname; | |
476 | |
934
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
477 sub_name1=malloc(strlen(fname)+8); |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
478 strcpy(sub_name1,fname); |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
479 |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
480 sub_name2=malloc (strlen(path) + strlen(fname) + 8); |
1081 | 481 if ((tmp=strrchr(fname,'/'))) |
934
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
482 sprintf (sub_name2, "%s%s", path, tmp+1); |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
483 else |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
484 sprintf (sub_name2, "%s%s", path, fname); |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
485 |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
486 aviptr1=strrchr(sub_name1,'.'); |
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
487 aviptr2=strrchr(sub_name2,'.'); |
892 | 488 |
489 for(j=0;j<=1;j++){ | |
490 char* sub_name=j?sub_name1:sub_name2; | |
491 for ( i=0;i<(sizeof(sub_exts)/sizeof(char*));i++ ) { | |
934
b2c7c4b49948
Gabucino (CGA user)'s request (finds default.subs well)
laaz
parents:
932
diff
changeset
|
492 strcpy(j?aviptr1:aviptr2,sub_exts[i]); |
935 | 493 // printf("trying: '%s'\n",sub_name); |
892 | 494 if((f=fopen( sub_name,"rt" ))) { |
509 | 495 fclose( f ); |
496 printf( "SUB: Detected sub file: %s\n",sub_name ); | |
1501
d40f2b686846
changes according to -utf8 option, draw_osd() function added
atlka
parents:
1081
diff
changeset
|
497 if (i<2) sub_utf8=1; |
509 | 498 return sub_name; |
892 | 499 } |
509 | 500 } |
892 | 501 } |
502 | |
509 | 503 return NULL; |
504 } | |
505 | |
1761 | 506 void list_sub_file(subtitle* subs){ |
507 int i,j; | |
508 | |
509 for(j=0;j<sub_num;j++){ | |
510 subtitle* egysub=&subs[j]; | |
511 printf ("%i line%c (%li-%li) ", | |
512 egysub->lines, | |
513 (1==egysub->lines)?' ':'s', | |
514 egysub->start, | |
515 egysub->end); | |
516 for (i=0; i<egysub->lines; i++) { | |
517 printf ("%s%s",egysub->text[i], i==egysub->lines-1?"":" <BREAK> "); | |
518 } | |
519 printf ("\n"); | |
520 } | |
521 | |
522 printf ("Subtitle format %s time.\n", sub_uses_time?"uses":"doesn't use"); | |
523 printf ("Read %i subtitles, %i errors.\n", sub_num, sub_errs); | |
524 | |
525 } | |
526 | |
625 | 527 #if 0 |
258 | 528 int main(int argc, char **argv) { // for testing |
529 | |
530 int i,j; | |
531 subtitle *subs; | |
532 subtitle *egysub; | |
533 | |
534 if(argc<2){ | |
535 printf("\nUsage: subreader filename.sub\n\n"); | |
536 exit(1); | |
537 } | |
538 | |
624 | 539 subs=sub_read_file(argv[1]); |
258 | 540 if(!subs){ |
541 printf("Couldn't load file... let's write a bugreport :)\n"); | |
542 exit(1); | |
543 } | |
1761 | 544 |
545 list_sub_file(subs); | |
258 | 546 |
547 return 0; | |
548 } | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
549 #endif |