Mercurial > mplayer.hg
annotate subreader.c @ 817:5fccfe90850c
updated
author | arpi_esp |
---|---|
date | Thu, 17 May 2001 01:07:56 +0000 |
parents | 8a7666a78f83 |
children | 16ba80b47ec4 |
rev | line source |
---|---|
258 | 1 /* |
2 * Subtitle reader with format autodetection | |
3 * | |
4 * Written by laaz | |
5 * Some code cleanup & realloc() by A'rpi/ESP-team | |
6 */ | |
7 | |
8 | |
9 #include <stdio.h> | |
10 #include <stdlib.h> | |
11 #include <string.h> | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
12 #include <ctype.h> |
258 | 13 |
14 #include "subreader.h" | |
15 | |
16 #define ERR (void *)-1 | |
17 | |
18 | |
19 int sub_uses_time=0; | |
20 int sub_errs=0; | |
624 | 21 int sub_num=0; // number of subtitle structs |
22 int sub_format=-1; // 0 for microdvd | |
23 // 1 for SubRip | |
24 // 2 for the third format (what's this?) | |
25 // 3 for SAMI (smi) | |
26 | |
27 int eol(char p) { | |
28 return (p=='\r' || p=='\n' || p=='\0'); | |
29 } | |
30 | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
31 static inline void trail_space(char *s) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
32 int i; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
33 while (isspace(*s)) strcpy(s, s + 1); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
34 i = strlen(s) - 1; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
35 while (i > 0 && isspace(s[i])) s[i--] = '\0'; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
36 } |
624 | 37 |
38 subtitle *sub_read_line_sami(FILE *fd, subtitle *current) { | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
39 static char line[1001]; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
40 static char *s = NULL; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
41 char text[1000], *p, *q; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
42 int state; |
624 | 43 |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
44 current->lines = current->start = current->end = 0; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
45 state = 0; |
624 | 46 |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
47 /* read the first line */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
48 if (!s) |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
49 if (!(s = fgets(line, 1000, fd))) return 0; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
50 |
624 | 51 do { |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
52 switch (state) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
53 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
54 case 0: /* find "START=" */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
55 s = strstr (s, "Start="); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
56 if (s) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
57 current->start = strtol (s + 6, &s, 0) / 10; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
58 state = 1; continue; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
59 } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
60 break; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
61 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
62 case 1: /* find "<P" */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
63 if ((s = strstr (s, "<P"))) { s += 2; state = 2; continue; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
64 break; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
65 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
66 case 2: /* find ">" */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
67 if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
68 break; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
69 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
70 case 3: /* get all text until '<' appears */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
71 if (*s == '\0') { break; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
72 else if (*s == '<') { state = 4; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
73 else if (!strncasecmp (s, " ", 6)) { *p++ = ' '; s += 6; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
74 else if (*s == '\r') { s++; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
75 else if (!strncasecmp (s, "<br>", 4) || *s == '\n') { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
76 *p = '\0'; p = text; trail_space (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
77 if (text[0] != '\0') |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
78 current->text[current->lines++] = strdup (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
79 if (*s == '\n') s++; else s += 4; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
80 } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
81 else *p++ = *s++; |
624 | 82 continue; |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
83 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
84 case 4: /* get current->end or skip <TAG> */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
85 q = strstr (s, "Start="); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
86 if (q) { |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
87 current->end = strtol (q + 6, &q, 0) / 10 - 1; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
88 *p = '\0'; trail_space (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
89 if (text[0] != '\0') |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
90 current->text[current->lines++] = strdup (text); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
91 if (current->lines > 0) { state = 99; break; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
92 state = 0; continue; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
93 } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
94 s = strchr (s, '>'); |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
95 if (s) { s++; state = 3; continue; } |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
96 break; |
624 | 97 } |
98 | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
99 /* read next line */ |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
100 if (state != 99 && !(s = fgets (line, 1000, fd))) return 0; |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
101 |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
102 } while (state != 99); |
624 | 103 |
104 return current; | |
105 } | |
258 | 106 |
107 | |
108 char *sub_readtext(char *source, char **dest) { | |
109 int len=0; | |
110 char *p; | |
111 | |
112 for (p=source;*p!='\r' && *p!='\n' && *p!='|'; p++,len++); | |
113 | |
114 *dest= (char *)malloc (len+1); | |
115 if (!dest) {return ERR;} | |
116 | |
117 strncpy(*dest, source, len); | |
118 (*dest)[len]=0; | |
119 | |
120 while (*p=='\r' || *p=='\n' || *p=='|') p++; | |
121 | |
122 if (*p) return p; // not-last text field | |
123 else return NULL; // last text field | |
124 } | |
125 | |
126 subtitle *sub_read_line_microdvd(FILE *fd,subtitle *current) { | |
127 char line[1001]; | |
128 char line2[1001]; | |
129 char *p, *next; | |
130 int i; | |
131 | |
132 bzero (current, sizeof(current)); | |
133 | |
134 do { | |
135 if (!fgets (line, 1000, fd)) return NULL; | |
136 } while (*line=='\n' || *line == '\r' || !*line); | |
137 | |
605 | 138 if (sscanf (line, "{%ld}{%ld}%s", &(current->start), &(current->end),line2) <2) {return ERR;} |
258 | 139 |
140 p=line; | |
141 while (*p++!='}'); | |
142 while (*p++!='}'); | |
143 | |
144 next=p, i=0; | |
145 while ((next =sub_readtext (next, &(current->text[i])))) { | |
270 | 146 if (current->text[i]==ERR) {return ERR;} |
258 | 147 i++; |
678 | 148 if (i>=SUB_MAX_TEXT) { printf ("Too many lines in a subtitle\n");current->lines=i;return;} |
258 | 149 } |
150 current->lines=i+1; | |
151 | |
152 return current; | |
153 } | |
154 | |
155 subtitle *sub_read_line_subrip(FILE *fd, subtitle *current) { | |
156 char line[1001]; | |
157 int a1,a2,a3,a4,b1,b2,b3,b4; | |
158 char *p=NULL, *q=NULL; | |
159 int len; | |
160 | |
161 bzero (current, sizeof(current)); | |
162 | |
163 while (!current->text[0]) { | |
164 if (!fgets (line, 1000, fd)) return NULL; | |
269 | 165 if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) continue; |
258 | 166 current->start = a1*360000+a2*6000+a3*100+a4; |
167 current->end = b1*360000+b2*6000+b3*100+b4; | |
168 | |
169 if (!fgets (line, 1000, fd)) return NULL; | |
170 | |
171 p=q=line; | |
172 for (current->lines=1; current->lines < SUB_MAX_TEXT; current->lines++) { | |
173 for (q=p,len=0; *p && *p!='\r' && *p!='\n' && strncmp(p,"[br]",4); p++,len++); | |
174 current->text[current->lines-1]=(char *)malloc (len+1); | |
175 if (!current->text[current->lines-1]) return ERR; | |
176 strncpy (current->text[current->lines-1], q, len); | |
270 | 177 current->text[current->lines-1][len]='\0'; |
258 | 178 if (!*p || *p=='\r' || *p=='\n') break; |
179 while (*p++!=']'); | |
180 } | |
181 } | |
182 return current; | |
183 } | |
184 | |
185 subtitle *sub_read_line_third(FILE *fd,subtitle *current) { | |
186 char line[1001]; | |
187 int a1,a2,a3,a4,b1,b2,b3,b4; | |
188 char *p=NULL; | |
189 int i,len; | |
190 | |
191 bzero (current, sizeof(current)); | |
192 | |
193 while (!current->text[0]) { | |
194 if (!fgets (line, 1000, fd)) return NULL; | |
269 | 195 if ((len=sscanf (line, "%d:%d:%d,%d --> %d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8) |
258 | 196 continue; |
197 current->start = a1*360000+a2*6000+a3*100+a4/10; | |
198 current->end = b1*360000+b2*6000+b3*100+b4/10; | |
199 for (i=0; i<SUB_MAX_TEXT;) { | |
269 | 200 if (!fgets (line, 1000, fd)) break; |
258 | 201 len=0; |
202 for (p=line; *p!='\n' && *p!='\r' && *p; p++,len++); | |
203 if (len) { | |
204 current->text[i]=(char *)malloc (len+1); | |
205 if (!current->text[i]) return ERR; | |
270 | 206 strncpy (current->text[i], line, len); current->text[i][len]='\0'; |
258 | 207 i++; |
208 } else { | |
209 break; | |
210 } | |
211 } | |
212 current->lines=i; | |
213 } | |
214 return current; | |
215 } | |
216 | |
217 | |
218 int sub_autodetect (FILE *fd) { | |
219 char line[1001]; | |
220 int i,j=0; | |
221 // char *p; | |
222 | |
624 | 223 while (j < 100) { |
258 | 224 j++; |
225 if (!fgets (line, 1000, fd)) | |
226 return -1; | |
227 | |
624 | 228 if (sscanf (line, "{%d}{%d}", &i, &i)==2) |
258 | 229 {sub_uses_time=0;return 0;} |
269 | 230 if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) |
258 | 231 {sub_uses_time=1;return 1;} |
269 | 232 if (sscanf (line, "%d:%d:%d,%d --> %d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) |
258 | 233 {sub_uses_time=1;return 2;} |
624 | 234 if (strstr (line, "<SAMI>")) |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
235 {sub_uses_time=1; return 3;} |
258 | 236 } |
624 | 237 |
238 return -1; // too many bad lines | |
258 | 239 } |
240 | |
241 | |
242 subtitle* sub_read_file (char *filename) { | |
243 FILE *fd; | |
244 int n_max; | |
245 subtitle *first; | |
624 | 246 subtitle * (*func[4])(FILE *fd,subtitle *dest)= |
258 | 247 { |
248 sub_read_line_microdvd, | |
249 sub_read_line_subrip, | |
624 | 250 sub_read_line_third, |
251 sub_read_line_sami | |
258 | 252 }; |
253 | |
254 fd=fopen (filename, "r"); if (!fd) return NULL; | |
255 | |
256 sub_format=sub_autodetect (fd); | |
257 if (sub_format==-1) {printf ("SUB: Could not determine file format\n");return NULL;} | |
624 | 258 printf ("SUB: Detected subtitle file format: %d\n",sub_format); |
258 | 259 |
260 rewind (fd); | |
261 | |
262 sub_num=0;n_max=32; | |
263 first=(subtitle *)malloc(n_max*sizeof(subtitle)); | |
264 if(!first) return NULL; | |
265 | |
266 while(1){ | |
267 subtitle *sub; | |
268 if(sub_num>=n_max){ | |
269 n_max+=16; | |
270 first=realloc(first,n_max*sizeof(subtitle)); | |
271 } | |
272 sub=func[sub_format](fd,&first[sub_num]); | |
273 if(!sub) break; // EOF | |
274 if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid | |
275 } | |
276 | |
277 fclose(fd); | |
278 | |
279 // printf ("SUB: Subtitle format %s time.\n", sub_uses_time?"uses":"doesn't use"); | |
269 | 280 printf ("SUB: Read %i subtitles", sub_num); |
624 | 281 if (sub_errs) printf (", %i bad line(s).\n", sub_errs); |
269 | 282 else printf (".\n"); |
258 | 283 |
284 return first; | |
285 } | |
286 | |
509 | 287 char * strreplace( char * in,char * what,char * whereof ) |
288 { | |
289 int i; | |
290 char * tmp; | |
291 | |
292 if ( ( in == NULL )||( what == NULL )||( whereof == NULL )||( ( tmp=strstr( in,what ) ) == NULL ) ) return NULL; | |
293 for( i=0;i<strlen( whereof );i++ ) tmp[i]=whereof[i]; | |
294 if ( strlen( what ) > strlen( whereof ) ) tmp[i]=0; | |
295 return in; | |
296 } | |
297 | |
298 char * sub_filename( char * fname ) | |
299 { | |
300 char * sub_name = NULL; | |
301 char * sub_tmp = NULL; | |
302 int i; | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
303 #define SUB_EXTS 6 |
509 | 304 char * sub_exts[SUB_EXTS] = |
305 { ".sub", | |
306 ".SUB", | |
307 ".srt", | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
308 ".SRT", |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
309 ".smi", |
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
310 ".SMI"}; |
509 | 311 |
312 if ( fname == NULL ) return NULL; | |
313 for( i=strlen( fname );i>0;i-- ) | |
314 if( fname[i] == '.' ) | |
315 { | |
316 sub_tmp=(char *)&fname[i]; | |
317 break; | |
318 } | |
319 if ( i == 0 ) return NULL; | |
320 sub_name=strdup( fname ); | |
321 for ( i=0;i<SUB_EXTS;i++ ) | |
322 { | |
323 FILE * f; | |
324 | |
325 strcpy( sub_name,fname ); | |
326 f=fopen( strreplace( sub_name,sub_tmp,sub_exts[i] ),"rt" ); | |
327 if ( f != NULL ) | |
328 { | |
329 fclose( f ); | |
330 printf( "SUB: Detected sub file: %s\n",sub_name ); | |
331 return sub_name; | |
332 } | |
333 } | |
334 return NULL; | |
335 } | |
336 | |
625 | 337 #if 0 |
258 | 338 int main(int argc, char **argv) { // for testing |
339 | |
340 int i,j; | |
341 subtitle *subs; | |
342 subtitle *egysub; | |
343 | |
344 if(argc<2){ | |
345 printf("\nUsage: subreader filename.sub\n\n"); | |
346 exit(1); | |
347 } | |
348 | |
624 | 349 subs=sub_read_file(argv[1]); |
258 | 350 if(!subs){ |
351 printf("Couldn't load file... let's write a bugreport :)\n"); | |
352 exit(1); | |
353 } | |
354 | |
355 for(j=0;j<sub_num;j++){ | |
356 egysub=&subs[j]; | |
357 printf ("%i line%c (%i-%i) ", | |
358 egysub->lines, | |
359 (1==egysub->lines)?' ':'s', | |
360 egysub->start, | |
361 egysub->end); | |
362 for (i=0; i<egysub->lines; i++) { | |
363 printf ("%s%s",egysub->text[i], i==egysub->lines-1?"":" <BREAK> "); | |
364 } | |
365 printf ("\n"); | |
366 } | |
367 | |
368 printf ("Subtitle format %s time.\n", sub_uses_time?"uses":"doesn't use"); | |
369 printf ("Read %i subtitles, %i errors.\n", sub_num, sub_errs); | |
370 return 0; | |
371 } | |
706
8a7666a78f83
better .smi support and display two-byte characters- patch by Sunjin Yang
arpi_esp
parents:
678
diff
changeset
|
372 #endif |