Mercurial > emacs
changeset 89431:c1527f26d513
Make it work for the map file included int glibc.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 20 May 2003 13:07:55 +0000 |
parents | 28939d6dacea |
children | d1fc6a0e4772 |
files | etc/charsets/gb18030-2.awk etc/charsets/gb18030-4.awk |
diffstat | 2 files changed, 84 insertions(+), 101 deletions(-) [+] |
line wrap: on
line diff
--- a/etc/charsets/gb18030-2.awk Tue May 20 13:06:09 2003 +0000 +++ b/etc/charsets/gb18030-2.awk Tue May 20 13:07:55 2003 +0000 @@ -11,6 +11,10 @@ tohex["d"] = 13; tohex["e"] = 14; tohex["f"] = 15; + from_gb = 0; + to_gb = -1; + to_unicode = 0; + from_unicode = 0; } function decode_hex(str) { @@ -30,56 +34,47 @@ function gb_to_index(gb) { b0 = int(gb / 256); b1 = gb % 256; - idx = (((b0 - 129)) * 190 + b1 - 64); - if (b1 >= 128) - idx--; + idx = (((b0 - 129)) * 191 + b1 - 64); +# if (b1 >= 128) +# idx--; return idx } function index_to_gb(idx) { - b0 = int(idx / 190) + 129; - b1 = (idx % 190) + 64; - if (b1 >= 127) - b1++; + b0 = int(idx / 191) + 129; + b1 = (idx % 191) + 64; +# if (b1 >= 127) +# b1++; return (b0 * 256 + b1); } -function decode_gb(str) { - b0 = decode_hex(substr(str, 3, 2)); - b1 = decode_hex(substr(str, 7, 2)); - return (b0 * 256 + b1) + +/^\#/ { + print; + next; } -/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ { - if ($2 ~ /^\\x[0-9A-F][0-9A-F]\\x[0-9A-F][0-9A-F]$/) +{ + gb = gb_to_index(decode_hex(substr($1, 3, 4))); + unicode = decode_hex(substr($2, 3, 4)); + if ((gb == to_gb + 1) && (unicode == to_unicode + 1)) { - unicode = decode_hex(substr($1, 3, 4)); - gb = decode_gb($2); - idx = gb_to_index(gb); - gb_table[idx] = unicode; + to_gb++; + to_unicode++; + } + else + { + if (from_gb == to_gb) + printf "0x%04X 0x%04X\n", index_to_gb(from_gb), from_unicode; + else if (from_gb < to_gb) + printf "0x%04X-0x%04X 0x%04X\n", + index_to_gb(from_gb), index_to_gb(to_gb), from_unicode; + from_gb = to_gb = gb; + from_unicode = to_unicode = unicode; } } END { - last_idx = gb_to_index(decode_hex("FEFE")); - from_idx = 0; - from_unicode = gb_table[0]; - for (i = 1; i <= last_idx; i++) - { - gb = index_to_gb(i); - unicode = gb_table[i]; - if (i - from_idx != unicode - from_unicode) - { - if (i - 1 == from_idx) - printf ("0x%04X 0x%04X\n", - index_to_gb(from_idx), from_unicode); - else - printf ("0x%04X-0x%04X 0x%04X\n", - index_to_gb(from_idx), index_to_gb(i - 1), from_unicode); - from_idx = i; - from_unicode=unicode; - } - } - if (i - from_idx != unicode - from_unicode) - printf ("0x%04X-0x%04X 0x%04X\n", - index_to_gb(from_idx), index_to_gb(i - 1), from_unicode); + if (from_gb <= to_gb) + printf "0x%04X-0x%04X 0x%04X\n", + index_to_gb(from_gb), index_to_gb(to_gb), from_unicode; }
--- a/etc/charsets/gb18030-4.awk Tue May 20 13:06:09 2003 +0000 +++ b/etc/charsets/gb18030-4.awk Tue May 20 13:07:55 2003 +0000 @@ -27,88 +27,76 @@ return n; } -function gb_to_index(b0,b1,b2,b3) { - return ((((b0 - 129) * 10 + (b1 - 48)) * 126 + (b2 - 129)) * 10 + b3 - 48); +function gb_to_index(gb) { + b0 = int(gb / 256); + b1 = gb % 256; + idx = (((b0 - 129)) * 191 + b1 - 64); +# if (b1 >= 127) +# idx--; + return idx } function index_to_gb(idx) { b3 = (idx % 10) + 48; - idx /= 10; + idx = int(idx / 10); b2 = (idx % 126) + 129; - idx /= 126; + idx = int(idx / 126); b1 = (idx % 10) + 48; - b0 = (idx / 10) + 129; + b0 = int(idx / 10) + 129; return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3); } -function decode_gb(str) { - b0 = decode_hex(substr(str, 3, 2)); - b1 = decode_hex(substr(str, 7, 2)); - b2 = decode_hex(substr(str, 11, 2)); - b3 = decode_hex(substr(str, 15, 2)); - return gb_to_index(b0, b1, b2, b3); +/^\#/ { + print; + next; } -function printline(from, to) { - fromgb = index_to_gb(from); - fromuni = gbtable[from]; - if (from == to) - printf ("0x%s 0x%04X\n", fromgb, fromuni); - else - printf ("0x%s-0x%s 0x%04X\n", fromgb, index_to_gb(to), fromuni); +/0x....-0x..../ { + gb_from = gb_to_index(decode_hex(substr($1, 3, 4))); + gb_to = gb_to_index(decode_hex(substr($1, 10, 4))); + unicode = decode_hex(substr($2, 3, 4)); + while (gb_from <= gb_to) + { + table[unicode++] = 1; + gb_from++; + } + next; } -/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ { - unicode = decode_hex(substr($1, 3, 4)); - if ($2 ~ /\\x8[1-4]\\x3[0-9]\\x[8-9A-F][0-9A-F]\\x3[0-9]/) - unitable[unicode] = decode_gb($2); - else - unitable[unicode] = -1; +{ + gb = decode_hex(substr($1, 3, 4)); + unicode = decode_hex(substr($2, 3, 4)); + table[unicode] = 1; } END { - lastgb = 0; - surrogate_min = decode_hex("D800"); - surrogate_max = decode_hex("DFFF"); - lastgb = unitable[128]; - gbtable[lastgb] = 128; - for (i = 129; i < 65536; i++) + from_gb = -1; + to_gb = 0; + from_i = 0; + table[65536] = 1; + for (i = 128; i <= 65536; i++) { - if (unitable[i] == 0 && (i < surrogate_min || i > surrogate_max)) + if (table[i] == 0) { - lastgb++; - gbtable[lastgb] = i; - unitable[i] = lastgb; + if (i < 55296 || i >= 57344) + { + if (from_gb < 0) + { + from_gb = to_gb; + from_i = i; + } + to_gb++; + } } - else if (unitable[i] > 0) + else if (from_gb >= 0) { - lastgb = unitable[i]; - gbtable[lastgb] = i; + if (from_gb + 1 == to_gb) + printf "0x%s\t\t0x%04X\n", + index_to_gb(from_gb), from_i; + else + printf "0x%s-0x%s\t0x%04X\n", + index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i; + from_gb = -1; } } - - fromgb = lastgb = unitable[128]; - for (i = 129; i < 65536; i++) - { - if (unitable[i] > 0) - { - if (lastgb + 1 == unitable[i]) - { - lastgb++; - } - else - { - if (lastgb >= 0) - printline(fromgb, lastgb); - fromgb = lastgb = unitable[i]; - } - } - else # i.e. (unitable[i] < 0) - { - if (lastgb >= 0) - printline(fromgb, lastgb); - lastgb = -1; - } - } - printline(fromgb, unitable[65535]); }