Mercurial > mplayer.hg
annotate TOOLS/subedit.pl @ 34697:ac6b38cd0d45
Rename sub window video window.
It was a bad idea to name the video window "sub window" at the time
the GUI was written. The term "sub window" does make sense from the
programmer's point of view, but it doesn't make any sense at all from
the user's point of view, because the sub window simply is the window
where the video will be displayed.
Moreover, since the term "sub" is generally short for "subtitles",
the renaming makes the code much easier to understand.
author | ib |
---|---|
date | Sat, 03 Mar 2012 16:45:15 +0000 |
parents | 92a795af2600 |
children |
rev | line source |
---|---|
12686 | 1 #!/usr/bin/perl -w |
2 | |
3 # A script for pipelined editing of subtitle files. | |
4 # Copyright (C) 2004 Michael Klepikov <mike72@mail.ru> | |
5 # | |
6 # Version 1.0 initial release 28-Mar-04 | |
7 # | |
8 # Comments, suggestions -- send me an mail, but the recommended way is | |
9 # to enhance/fix on your own and submit to the distribution;) | |
10 # If you like, I can review the fixes. | |
11 # | |
12 # This script is free software; you can redistribute it and/or | |
13 # modify it under the terms of the GNU Lesser General Public | |
14 # License as published by the Free Software Foundation; either | |
25542
92a795af2600
Fix license header to read Lesser General Public License 2.1,
diego
parents:
17367
diff
changeset
|
15 # version 2.1 of the License, or (at your option) any later version. |
12686 | 16 # Retain original credits when modifying. |
17 # | |
18 # This script is distributed in the hope that it will be useful, | |
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
21 # Lesser General Public License for more details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public | |
24 # License along with this library; if not, write to the Free Software | |
17367
401b440a6d76
Update licensing information: The FSF changed postal address.
diego
parents:
12686
diff
changeset
|
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
12686 | 26 # |
27 | |
28 use Math::BigInt; | |
29 | |
30 # Constants | |
31 my $FMT_UNKNOWN = 0; | |
32 my $FMT_SRT = 1; | |
33 | |
34 # Argument values | |
35 my $DEBUG = 0; | |
36 my $inFormat; | |
37 my $outFormat; | |
38 my $shiftMilli; | |
39 my $scaleMilli; | |
40 my $splitFromMilli; | |
41 my $splitToMilli; | |
42 | |
43 ## Process command line | |
44 while (defined ($argVal = shift)) { | |
45 if ($argVal eq "-d" || $argVal eq "--debug") { | |
46 $DEBUG = 1; | |
47 } elsif ($argVal eq "-if" || $argVal eq "--input-format") { | |
48 $inFormat = shift; | |
49 usage ("Must specify input format") if ! $inFormat; | |
50 if ($inFormat =~ /^srt/i) { | |
51 $inFormat = $FMT_SRT; | |
52 } else { | |
53 usage ("Invalid input format"); | |
54 } | |
55 } elsif ($argVal eq "-of" || $argVal eq "--output-format") { | |
56 $outFormat = shift; | |
57 usage ("Must specify input format") if ! $outFormat; | |
58 if ($outFormat =~ /^srt/i) { | |
59 $outFormat = $FMT_SRT; | |
60 } else { | |
61 usage ("Invalid output format"); | |
62 } | |
63 } elsif ($argVal eq "-s" || $argVal eq "--shift") { | |
64 my $argTime = shift; | |
65 if (! defined $argTime || | |
66 ! defined ($shiftMilli = getTimeMillis ($argTime))) { | |
67 usage ("Invalid shift time value"); | |
68 } | |
69 } elsif ($argVal eq "-c" || $argVal eq "--scale") { | |
70 my $argTime = shift; | |
71 if (! defined $argTime || | |
72 ! defined ($scaleMilli = getTimeMillis ($argTime))) { | |
73 usage ("Invalid scale time value"); | |
74 } | |
75 } elsif ($argVal eq "-f" || $argVal eq "--split-from") { | |
76 my $argTime = shift; | |
77 if (! defined $argTime || | |
78 ! defined ($splitFromMilli = getTimeMillis ($argTime))) { | |
79 usage ("Invalid split start time value"); | |
80 } | |
81 } elsif ($argVal eq "-t" || $argVal eq "--split-to") { | |
82 my $argTime = shift; | |
83 if (! defined $argTime || | |
84 ! defined ($splitToMilli = getTimeMillis ($argTime))) { | |
85 usage ("Invalid split end time value"); | |
86 } | |
87 } elsif ($argVal eq "-h" || $argVal eq "--help") { | |
88 usage (); | |
89 } else { | |
90 usage ("Unrecognized argument $argVal"); | |
91 } | |
92 } | |
93 | |
94 # Input format defaults to SRT | |
95 $inFormat = $FMT_SRT if (! defined $inFormat); | |
96 # Output format defaults to the same as input | |
97 $outFormat = $inFormat if (! defined $outFormat); | |
98 | |
99 ## Read | |
100 | |
101 my $subs; | |
102 if ($inFormat == $FMT_SRT) { | |
103 $subs = readSRT (*STDIN); | |
104 printf STDERR ("Read %d SRT subs\n", scalar @{$subs}) if $DEBUG; | |
105 # Sort by start time | |
106 @{$subs} = sort {$a -> {srtStartTime} <=> $b -> {srtEndTime}} @{$subs}; | |
107 } | |
108 | |
109 ## Transform | |
110 | |
111 if (defined $shiftMilli && 0 != $shiftMilli) { | |
112 printf STDERR ("Shift: %d milliseconds\n", $shiftMilli) if $DEBUG; | |
113 shiftSRT ($subs, $shiftMilli); | |
114 } | |
115 | |
116 if (defined $splitFromMilli || defined $splitToMilli) { | |
117 if ($DEBUG) { | |
118 my $printFrom = (defined $splitFromMilli) ? $splitFromMilli : "-"; | |
119 my $printTo = (defined $splitToMilli) ? $splitToMilli : "-"; | |
120 printf STDERR ("Split: from $printFrom to $printTo\n"); | |
121 } | |
122 splitSRT ($subs, $splitFromMilli, $splitToMilli); | |
123 } | |
124 | |
125 if (defined $scaleMilli && 0 != $scaleMilli) { | |
126 my $lastSubIdx = scalar @{$subs} - 1; | |
127 if ($lastSubIdx >= 0) { | |
128 my $lastTimeOrig = $subs -> [$lastSubIdx] -> {srtEndTime}; | |
129 if ($lastTimeOrig == 0) { | |
130 die "Cannot scale when last subtitle ends at 00:00:00,000"; | |
131 } | |
132 my $lastTimeScaled = $lastTimeOrig + $scaleMilli; | |
133 printf STDERR ("Scale: %d/%d\n", $lastTimeScaled, $lastTimeOrig) if $DEBUG; | |
134 scaleSRT ($subs, $lastTimeScaled, $lastTimeOrig); | |
135 } | |
136 } | |
137 | |
138 ## Write | |
139 if ($outFormat == $FMT_SRT) { | |
140 writeSRT (*STDOUT, $subs); | |
141 } | |
142 | |
143 # Close STDOUT, as recommended by Perl manual | |
144 # (allows diagnostics on disc overflow, etc.) | |
145 close (STDOUT) || die "Cannot close output stream: $!"; | |
146 | |
147 exit 0; | |
148 | |
149 ## Subroutines | |
150 | |
151 # Convert string time format to milliseconds | |
152 # SRT style: "01:20:03.251", and "," is allowed instead of "." | |
153 # Return undef in case of format error | |
154 sub getTimeMillis | |
155 { | |
156 $_ = shift; | |
157 my $millis = 0; | |
158 | |
159 if (/\s*(.*)[\.,]([0-9]+)?\s*$/) { # Fraction; strip surrounding spaces | |
160 #print STDERR "frac: \$1=$1 \$2=$2\n" if $DEBUG; | |
161 $_ = $1; | |
162 $millis += ("0." . $2) * 1000 if $2; | |
163 } | |
164 if (/(.*?)([0-9]+)$/) { # Seconds | |
165 #print STDERR "secs: \$1=$1 \$2=$2\n" if $DEBUG; | |
166 $_ = $1; | |
167 $millis += $2 * 1000 if $2; | |
168 } | |
169 if (/(.*?)([0-9]+):$/) { # Minutes | |
170 #print STDERR "mins: \$1=$1 \$2=$2\n" if $DEBUG; | |
171 $_ = $1; | |
172 $millis += $2 * 60000 if $2; | |
173 } | |
174 if (/(.*?)([0-9]+):$/) { # Hours | |
175 #print STDERR "mins: \$1=$1 \$2=$2\n" if $DEBUG; | |
176 $_ = $1; | |
177 $millis += $2 * 3600000 if $2; | |
178 } | |
179 if (/(.*?)\-$/) { # Minus sign | |
180 $_ = $1; | |
181 $millis *= -1; | |
182 } | |
183 $millis = undef if (! /^$/); # Make sure we ate everything up | |
184 if ($DEBUG) { | |
185 if (defined $millis) { | |
186 #print STDERR "time value match: $millis ms\n"; | |
187 } else { | |
188 #print STDERR "time mismatch\n"; | |
189 } | |
190 } | |
191 return $millis; | |
192 } | |
193 | |
194 # Convert milliseconds to SRT formatted string | |
195 sub getTimeSRT | |
196 { | |
197 my $t = shift; | |
198 my $tMinus = ""; | |
199 if ($t < 0) { | |
200 $t = -$t; | |
201 $tMinus = "-"; | |
202 } | |
203 my $tMilli = $t % 1000; | |
204 $t /= 1000; | |
205 my $tSec = $t % 60; | |
206 $t /= 60; | |
207 my $tMin = $t % 60; | |
208 $t /= 60; | |
209 my $tHr = $t; | |
210 return sprintf ("%s%02d:%02d:%02d,%03d", | |
211 $tMinus, $tHr, $tMin, $tSec, $tMilli); | |
212 } | |
213 | |
214 # Read SRT subtitles | |
215 sub readSRT | |
216 { | |
217 local *IN = shift; | |
218 my $subs = []; | |
219 | |
220 $_ = <IN>; | |
221 print STDERR "Undefined first line\n" if ! defined $_ && $DEBUG; | |
222 my $lineNo = 1; | |
223 READ_SUBS: | |
224 while (defined $_) { | |
225 # Each loop iteration reads one subtitle from <IN> | |
226 my $sub = {}; | |
227 | |
228 # print STDERR "Reading line $lineNo\n" if $DEBUG; | |
229 | |
230 # Skip empty lines | |
231 while (/^\s*$/) { | |
232 last READ_SUBS if ! ($_ = <IN>); | |
233 ++$lineNo; | |
234 } | |
235 | |
236 # Subtitle number | |
237 if (/^\s*([0-9]+)\s*$/) { | |
238 $sub -> {srtNumber} = $1; | |
239 # print "SRT num: $1\n" if $DEBUG; | |
240 } else { | |
241 die "Invalid SRT format at line $lineNo"; | |
242 } | |
243 | |
244 # Timing | |
245 if ($_ = <IN>) { | |
246 ++$lineNo; | |
247 } else { | |
248 die "Unexpected end of SRT stream at line $lineNo"; | |
249 } | |
250 # print STDERR "LINE: $_\n" if $DEBUG; | |
251 if (/^\s*(\S+)\s*--\>\s*(\S+)\s*$/) { | |
252 my $startMillis = getTimeMillis ($1); | |
253 my $endMillis = getTimeMillis ($2); | |
254 die "Invalid SRT timing format at line $lineNo: $_" | |
255 if ! defined $startMillis || ! defined $endMillis; | |
256 $sub -> {srtStartTime} = $startMillis; | |
257 $sub -> {srtEndTime} = $endMillis; | |
258 } else { | |
259 die "Invalid SRT timing format at line $lineNo: $_"; | |
260 } | |
261 | |
262 # Text lines | |
263 my $subLines = []; | |
264 while (1) { | |
265 last if ! ($_ = <IN>); # EOF ends subtitle | |
266 ++$lineNo; | |
267 last if /^\s*$/; # Empty line ends subtitle | |
268 ($_ = $_) =~ s/\s+$//; # Strip trailing spaces | |
269 push @{$subLines}, $_; | |
270 } | |
271 die "No text in SRT subtitle at line $lineNo" if 0 == scalar @{$subLines}; | |
272 $sub -> {lines} = $subLines; | |
273 | |
274 # Append subtitle to the list | |
275 push @{$subs}, $sub; | |
276 } | |
277 print STDERR "SRT read ok, $lineNo lines\n" if $DEBUG; | |
278 | |
279 return $subs; | |
280 } | |
281 | |
282 # Write SRT subtitles | |
283 sub writeSRT | |
284 { | |
285 use integer; # For integer division | |
286 local *OUT = shift; | |
287 my $subs = shift; | |
288 | |
289 my $subNum = 0; | |
290 foreach (@{$subs}) { | |
291 ++$subNum; | |
292 | |
293 my $sub = $_; | |
294 my $sTimeSRT = getTimeSRT ($sub -> {srtStartTime}); | |
295 my $eTimeSRT = getTimeSRT ($sub -> {srtEndTime}); | |
296 printf OUT ("%d\n%s --> %s\n", $subNum, $sTimeSRT, $eTimeSRT); | |
297 foreach (@{$sub -> {lines}}) { | |
298 printf OUT ("%s\n", $_); | |
299 } | |
300 printf OUT "\n"; | |
301 } | |
302 printf STDERR ("Wrote %d SRT subs\n", $subNum) if $DEBUG; | |
303 } | |
304 | |
305 # Shift SRT subtitles by a given number of seconds. | |
306 # The number may be negative and fractional. | |
307 sub shiftSRT | |
308 { | |
309 use integer; # $shiftMilli could be passed as float | |
310 my $subs = shift; | |
311 my $shiftMilli = shift; | |
312 | |
313 foreach (@{$subs}) { | |
314 $_ -> {srtStartTime} += $shiftMilli; | |
315 $_ -> {srtEndTime} += $shiftMilli; | |
316 } | |
317 } | |
318 | |
319 # Multiply each subtitle timing by a divident and divide by divisor. | |
320 # The idea is that the divident is usually the new total number of | |
321 # milliseconds in the subtitle file, and the divisor is the old | |
322 # total number of milliseconds in the subtitle file. | |
323 # We could simply use a double precision real coefficient instead of | |
324 # integer divident and divisor, and that could be good enough, but | |
325 # using integer arithmetics *guarantees* precision up to the last | |
326 # digit, so why settle for good enough when we can have a guarantee. | |
327 # | |
328 # Uses Math::BigInt arithmetics, because it works with numbers | |
329 # up to (total number of milliseconds for a subtitle timing)^2, | |
330 # which could be on the order of approximately 1e+13, which is | |
331 # larger than maximum 32-bit integer. | |
332 # There is a performance loss when using BigInt vs. regular floating | |
333 # point arithmetics, but the actual performance is quite acceptable | |
334 # on files with a few thousand subtitles. | |
335 sub scaleSRT | |
336 { | |
337 use integer; # Divident and divisor could be passed as floats, truncate | |
338 my $subs = shift; | |
339 my $scaleDividend = shift; | |
340 my $scaleDivisor = shift; | |
341 | |
342 foreach (@{$subs}) { | |
343 my $ss = Math::BigInt -> new ($_ -> {srtStartTime}); | |
344 $ss = $ss -> bmul ($scaleDividend); | |
345 $_ -> {srtStartTime} = $ss -> bdiv ($scaleDivisor) -> bsstr (); | |
346 my $se = Math::BigInt -> new ($_ -> {srtEndTime}); | |
347 $se = $se -> bmul ($scaleDividend); | |
348 $_ -> {srtEndTime} = $se -> bdiv ($scaleDivisor) -> bsstr (); | |
349 } | |
350 } | |
351 | |
352 # Extract a fragment within a given time interval | |
353 # Either "from" or "to" may be undefined | |
354 sub splitSRT | |
355 { | |
356 use integer; # fromMilli and toMilli could be passed as floats, truncate | |
357 my $subs = shift; | |
358 my $fromMilli = shift; | |
359 my $toMilli = shift; | |
360 | |
361 my $iSub = 0; | |
362 while ($iSub < scalar @{$subs}) { | |
363 $_ = $subs -> [$iSub]; | |
364 my $keep = 0; | |
365 if (! defined $fromMilli || $_ -> {srtEndTime} >= $fromMilli) { | |
366 # The subtitle ends later than the start boundary | |
367 | |
368 # Fix overlapping start timing, | |
369 # but only of the start boundary is not infinite (undef) | |
370 if (defined $fromMilli && $_ -> {srtStartTime} < $fromMilli) { | |
371 $_ -> {srtStartTime} = $fromMilli; | |
372 } | |
373 if (! defined $toMilli || $_ -> {srtStartTime} <= $toMilli) { | |
374 # The subtitle begins earlier than the end boundary | |
375 | |
376 # Fix overlapping end timing, | |
377 # but only of the end boundary is not infinite (undef) | |
378 if (defined $toMilli && $_ -> {srtEndTime} > $toMilli) { | |
379 $_ -> {srtEndTime} = $toMilli; | |
380 } | |
381 | |
382 # All conditions met, all fixes done | |
383 $keep = 1; | |
384 } | |
385 } | |
386 if ($keep) { | |
387 ++$iSub; | |
388 } else { | |
389 splice @{$subs}, $iSub, 1; | |
390 } | |
391 } | |
392 } | |
393 | |
394 # Print brief usage help | |
395 # Accepts an optional error message, e.g. for errors parsing command line | |
396 sub usage | |
397 { | |
398 my $msg = shift; | |
399 my $exitCode = 0; | |
400 | |
401 if (defined $msg) { | |
402 $exitCode = 2; | |
403 print STDERR "$msg\n"; | |
404 } | |
405 | |
406 print STDERR <<USAGE; | |
407 Usage: $0 [switches] | |
408 -if,--input-format <fmt> input format; supported: SRT | |
409 default is SRT | |
410 -of,--output-format <fmt> output format; supported: SRT | |
411 default is same as input format | |
412 -s,--shift <time> shift all subtitles by <time> | |
413 (format: [-]hh:mm:ss,fraction) | |
414 -c,--scale <time> scale by adding <time> to overall duration | |
415 -f,--split-from <time> Drop subtitles that end before <time> | |
416 -t,--split-to <time> Drop subtitles that start after <time> | |
417 (will truncate timing if it overlaps a boundary) | |
418 -r,--renumber renumber SRT subtitles in output | |
419 -d,--debug enable debug output | |
420 -h,--help this help message | |
421 | |
422 All times could be negative. Input/output may also contain negative timings, | |
423 which is sometimes useful for intermediate results. | |
424 SRT subtitles are always renumbered on output. | |
425 | |
426 EXAMPLES | |
427 | |
428 Split subtitle file into two disks at a boundary of one hour 15 minutes: | |
429 | |
430 subedit.pl --split-to 1:15:0 < all.srt > p1.srt | |
431 subedit.pl -f 1:15:0 < all.srt | subedit.pl --shift -1:15:0 > p2.srt | |
432 | |
433 Join the previous two disks back into one file: | |
434 | |
435 subedit.pl -s 1:15:00 < p2.srt | cat p1.srt - | subedit.pl > all.srt | |
436 | |
437 Correct a situation where the first subtitle starts in sync with the video, | |
438 but the last one starts 3.5 seconds earlier than the speech in the video, | |
439 assuming the first subtitle timing is 00:01:05.030: | |
440 | |
441 subedit.pl -s -1:5.03 | subedit.pl -c 3.5 | subedit.pl -s 1:5.03 | |
442 USAGE | |
443 | |
444 exit $exitCode; | |
445 } |