Mercurial > hgbook
comparison en/autoid.py @ 749:7e7c47481e4f
Oops, this is the real merge for my hg's oddity
author | Dongsheng Song <dongsheng.song@gmail.com> |
---|---|
date | Fri, 20 Mar 2009 16:43:35 +0800 |
parents | c838b3975bc6 |
children |
comparison
equal
deleted
inserted
replaced
748:d13c7c706a58 | 749:7e7c47481e4f |
---|---|
1 #!/usr/bin/env python | |
2 # | |
3 # Add unique ID attributes to para tags. This script should only be | |
4 # run by one person, since otherwise it introduces the possibility of | |
5 # chaotic conflicts among tags. | |
6 | |
7 import glob, os, re, sys | |
8 | |
9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M) | |
10 untagged = re.compile('<para>') | |
11 | |
12 names = glob.glob('ch*.xml') + glob.glob('app*.xml') | |
13 | |
14 # First pass: find the highest-numbered paragraph ID. | |
15 | |
16 biggest_id = 0 | |
17 seen = set() | |
18 errs = 0 | |
19 | |
20 for name in names: | |
21 for m in tagged.finditer(open(name).read()): | |
22 i = int(m.group(1),16) | |
23 if i in seen: | |
24 print >> sys.stderr, '%s: duplication of ID %s' % (name, i) | |
25 errs += 1 | |
26 seen.add(i) | |
27 if i > biggest_id: | |
28 biggest_id = i | |
29 | |
30 def retag(s): | |
31 global biggest_id | |
32 biggest_id += 1 | |
33 return '<para id="x_%x">' % biggest_id | |
34 | |
35 # Second pass: add IDs to paragraphs that currently lack them. | |
36 | |
37 for name in names: | |
38 f = open(name).read() | |
39 f1 = untagged.sub(retag, f) | |
40 if f1 != f: | |
41 tmpname = name + '.tmp' | |
42 fp = open(tmpname, 'w') | |
43 fp.write(f1) | |
44 fp.close() | |
45 os.rename(tmpname, name) | |
46 | |
47 sys.exit(errs) |