comparison admin/charsets/mapconv @ 103578:63a1307441f9

For the UNICODE format files, do reverse sort and don't compact the map. This is to prefer the first one in the duplicated mappings (e.g. 0x20->U+0020, 0x20->U+00A0).
author Kenichi Handa <handa@m17n.org>
date Wed, 24 Jun 2009 13:02:50 +0000
parents 24af3ac0cac1
children 1d1d5d9bd884
comparison
equal deleted inserted replaced
103577:520b069d6504 103578:63a1307441f9
28 # YYYY is the corresponding Unicode character code in hexa-decimal. 28 # YYYY is the corresponding Unicode character code in hexa-decimal.
29 # Arguments are: 29 # Arguments are:
30 # $1: source map file 30 # $1: source map file
31 # $2: address pattern for sed (optionally with substitution command) 31 # $2: address pattern for sed (optionally with substitution command)
32 # $3: format of source map file 32 # $3: format of source map file
33 # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA 33 # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE UNICODE2 YASUOKA
34 # $4: awk script 34 # $4: awk script
35 35
36 FILE="admin/charsets/$1" 36 FILE="admin/charsets/$1"
37 BASE=`basename $1 .gz` 37 BASE=`basename $1 .gz`
38 38
113 | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ 113 | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \
114 | sort | ${AWKPROG} 114 | sort | ${AWKPROG}
115 elif [ "$3" = "UNICODE" ] ; then 115 elif [ "$3" = "UNICODE" ] ; then
116 # Source format is: 116 # Source format is:
117 # YYYY XX 117 # YYYY XX
118 # We perform reverse sort to prefer the first one in the
119 # duplicated mappings (e.g. 0x20->U+0020, 0x20->U+00A0).
118 zcat $1 | sed -n -e "$2 p" \ 120 zcat $1 | sed -n -e "$2 p" \
119 | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ 121 | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \
120 | sort | ${AWKPROG} 122 | sort -r
121 elif [ "$3" = "UNICODE2" ] ; then 123 elif [ "$3" = "UNICODE2" ] ; then
122 # Source format is: 124 # Source format is:
123 # 0xXXXX 0xYYYY # ... 125 # 0xXXXX 0xYYYY # ...
124 zcat $1 | sed -n -e "$2 p" \ 126 zcat $1 | sed -n -e "$2 p" \
125 | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \ 127 | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \