Mercurial > emacs
diff etc/charsets/gb18030-4.awk @ 89431:c1527f26d513
Make it work for the map file included int glibc.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 20 May 2003 13:07:55 +0000 |
parents | 264dcdcfe621 |
children |
line wrap: on
line diff
--- a/etc/charsets/gb18030-4.awk Tue May 20 13:06:09 2003 +0000 +++ b/etc/charsets/gb18030-4.awk Tue May 20 13:07:55 2003 +0000 @@ -27,88 +27,76 @@ return n; } -function gb_to_index(b0,b1,b2,b3) { - return ((((b0 - 129) * 10 + (b1 - 48)) * 126 + (b2 - 129)) * 10 + b3 - 48); +function gb_to_index(gb) { + b0 = int(gb / 256); + b1 = gb % 256; + idx = (((b0 - 129)) * 191 + b1 - 64); +# if (b1 >= 127) +# idx--; + return idx } function index_to_gb(idx) { b3 = (idx % 10) + 48; - idx /= 10; + idx = int(idx / 10); b2 = (idx % 126) + 129; - idx /= 126; + idx = int(idx / 126); b1 = (idx % 10) + 48; - b0 = (idx / 10) + 129; + b0 = int(idx / 10) + 129; return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3); } -function decode_gb(str) { - b0 = decode_hex(substr(str, 3, 2)); - b1 = decode_hex(substr(str, 7, 2)); - b2 = decode_hex(substr(str, 11, 2)); - b3 = decode_hex(substr(str, 15, 2)); - return gb_to_index(b0, b1, b2, b3); +/^\#/ { + print; + next; } -function printline(from, to) { - fromgb = index_to_gb(from); - fromuni = gbtable[from]; - if (from == to) - printf ("0x%s 0x%04X\n", fromgb, fromuni); - else - printf ("0x%s-0x%s 0x%04X\n", fromgb, index_to_gb(to), fromuni); +/0x....-0x..../ { + gb_from = gb_to_index(decode_hex(substr($1, 3, 4))); + gb_to = gb_to_index(decode_hex(substr($1, 10, 4))); + unicode = decode_hex(substr($2, 3, 4)); + while (gb_from <= gb_to) + { + table[unicode++] = 1; + gb_from++; + } + next; } -/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ { - unicode = decode_hex(substr($1, 3, 4)); - if ($2 ~ /\\x8[1-4]\\x3[0-9]\\x[8-9A-F][0-9A-F]\\x3[0-9]/) - unitable[unicode] = decode_gb($2); - else - unitable[unicode] = -1; +{ + gb = decode_hex(substr($1, 3, 4)); + unicode = decode_hex(substr($2, 3, 4)); + table[unicode] = 1; } END { - lastgb = 0; - surrogate_min = decode_hex("D800"); - surrogate_max = decode_hex("DFFF"); - lastgb = unitable[128]; - gbtable[lastgb] = 128; - for (i = 129; i < 65536; i++) + from_gb = -1; + to_gb = 0; + from_i = 0; + table[65536] = 1; + for (i = 128; i <= 65536; i++) { - if (unitable[i] == 0 && (i < surrogate_min || i > surrogate_max)) + if (table[i] == 0) { - lastgb++; - gbtable[lastgb] = i; - unitable[i] = lastgb; + if (i < 55296 || i >= 57344) + { + if (from_gb < 0) + { + from_gb = to_gb; + from_i = i; + } + to_gb++; + } } - else if (unitable[i] > 0) + else if (from_gb >= 0) { - lastgb = unitable[i]; - gbtable[lastgb] = i; + if (from_gb + 1 == to_gb) + printf "0x%s\t\t0x%04X\n", + index_to_gb(from_gb), from_i; + else + printf "0x%s-0x%s\t0x%04X\n", + index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i; + from_gb = -1; } } - - fromgb = lastgb = unitable[128]; - for (i = 129; i < 65536; i++) - { - if (unitable[i] > 0) - { - if (lastgb + 1 == unitable[i]) - { - lastgb++; - } - else - { - if (lastgb >= 0) - printline(fromgb, lastgb); - fromgb = lastgb = unitable[i]; - } - } - else # i.e. (unitable[i] < 0) - { - if (lastgb >= 0) - printline(fromgb, lastgb); - lastgb = -1; - } - } - printline(fromgb, unitable[65535]); }