# HG changeset patch # User Bryan O'Sullivan # Date 1182103753 25200 # Node ID ec6a3bb109863418ff891364c1bdcc4a0a052148 # Parent b42689a730a20f4516324f946a635e40285a0042 HTML: replace Unicode ligatures with plain ASCII. Thanks to Johannes Hoff for spotting this. diff -r b42689a730a2 -r ec6a3bb10986 en/fixhtml.py --- a/en/fixhtml.py Sun Jun 17 11:42:14 2007 +0200 +++ b/en/fixhtml.py Sun Jun 17 11:09:13 2007 -0700 @@ -22,6 +22,7 @@ angle_re = re.compile(r'([CE];)') unicode_re = re.compile(r'�([0-7][0-9A-F]);') fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I) +ligature_re = re.compile(r'ྰ([0-4]);') tmpsuffix = '.tmp.' + str(os.getpid()) @@ -31,12 +32,18 @@ def fix_ascii(m): return chr(int(m.group(1), 16)) +ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl'] + +def expand_ligature(m): + return ligatures[int(m.group(1))] + for name in sys.argv[1:]: tmpname = name + tmpsuffix ofp = file(tmpname, 'w') for line in file(name): line = angle_re.sub(hide_angle, line) line = unicode_re.sub(fix_ascii, line) + line = ligature_re.sub(expand_ligature, line) line = fancyvrb_re.sub('id="fancyvrb"', line) ofp.write(line) ofp.close()