Mercurial > hgbook
changeset 260:ec6a3bb10986
HTML: replace Unicode ligatures with plain ASCII.
Thanks to Johannes Hoff for spotting this.
author | Bryan O'Sullivan <bos@serpentine.com> |
---|---|
date | Sun, 17 Jun 2007 11:09:13 -0700 |
parents | b42689a730a2 |
children | a24b370a16ee |
files | en/fixhtml.py |
diffstat | 1 files changed, 7 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/en/fixhtml.py Sun Jun 17 11:42:14 2007 +0200 +++ b/en/fixhtml.py Sun Jun 17 11:09:13 2007 -0700 @@ -22,6 +22,7 @@ angle_re = re.compile(r'([CE];)') unicode_re = re.compile(r'�([0-7][0-9A-F]);') fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I) +ligature_re = re.compile(r'ྰ([0-4]);') tmpsuffix = '.tmp.' + str(os.getpid()) @@ -31,12 +32,18 @@ def fix_ascii(m): return chr(int(m.group(1), 16)) +ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl'] + +def expand_ligature(m): + return ligatures[int(m.group(1))] + for name in sys.argv[1:]: tmpname = name + tmpsuffix ofp = file(tmpname, 'w') for line in file(name): line = angle_re.sub(hide_angle, line) line = unicode_re.sub(fix_ascii, line) + line = ligature_re.sub(expand_ligature, line) line = fancyvrb_re.sub('id="fancyvrb"', line) ofp.write(line) ofp.close()