changeset 260:ec6a3bb10986

HTML: replace Unicode ligatures with plain ASCII. Thanks to Johannes Hoff for spotting this.
author Bryan O'Sullivan <bos@serpentine.com>
date Sun, 17 Jun 2007 11:09:13 -0700
parents b42689a730a2
children a24b370a16ee
files en/fixhtml.py
diffstat 1 files changed, 7 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/en/fixhtml.py	Sun Jun 17 11:42:14 2007 +0200
+++ b/en/fixhtml.py	Sun Jun 17 11:09:13 2007 -0700
@@ -22,6 +22,7 @@
 angle_re = re.compile(r'(&#x003[CE];)')
 unicode_re = re.compile(r'&#x00([0-7][0-9A-F]);')
 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I)
+ligature_re = re.compile(r'&#xFB0([0-4]);')
 
 tmpsuffix = '.tmp.' + str(os.getpid())
 
@@ -31,12 +32,18 @@
 def fix_ascii(m):
     return chr(int(m.group(1), 16))
 
+ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl']
+
+def expand_ligature(m):
+    return ligatures[int(m.group(1))]
+
 for name in sys.argv[1:]:
     tmpname = name + tmpsuffix
     ofp = file(tmpname, 'w')
     for line in file(name):
         line = angle_re.sub(hide_angle, line)
         line = unicode_re.sub(fix_ascii, line)
+        line = ligature_re.sub(expand_ligature, line)
         line = fancyvrb_re.sub('id="fancyvrb"', line)
         ofp.write(line)
     ofp.close()