Mercurial > emacs
diff etc/charsets/gb18030-4.awk @ 88674:264dcdcfe621
New file.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 30 May 2002 00:21:40 +0000 |
parents | |
children | c1527f26d513 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/etc/charsets/gb18030-4.awk Thu May 30 00:21:40 2002 +0000 @@ -0,0 +1,114 @@ +BEGIN { + tohex["A"] = 10; + tohex["B"] = 11; + tohex["C"] = 12; + tohex["D"] = 13; + tohex["E"] = 14; + tohex["F"] = 15; + tohex["a"] = 10; + tohex["b"] = 11; + tohex["c"] = 12; + tohex["d"] = 13; + tohex["e"] = 14; + tohex["f"] = 15; +} + +function decode_hex(str) { + n = 0; + len = length(str); + for (i = 1; i <= len; i++) + { + c = substr (str, i, 1); + if (c >= "0" && c <= "9") + n = n * 16 + (c - "0"); + else + n = n * 16 + tohex[c]; + } + return n; +} + +function gb_to_index(b0,b1,b2,b3) { + return ((((b0 - 129) * 10 + (b1 - 48)) * 126 + (b2 - 129)) * 10 + b3 - 48); +} + +function index_to_gb(idx) { + b3 = (idx % 10) + 48; + idx /= 10; + b2 = (idx % 126) + 129; + idx /= 126; + b1 = (idx % 10) + 48; + b0 = (idx / 10) + 129; + return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3); +} + +function decode_gb(str) { + b0 = decode_hex(substr(str, 3, 2)); + b1 = decode_hex(substr(str, 7, 2)); + b2 = decode_hex(substr(str, 11, 2)); + b3 = decode_hex(substr(str, 15, 2)); + return gb_to_index(b0, b1, b2, b3); +} + +function printline(from, to) { + fromgb = index_to_gb(from); + fromuni = gbtable[from]; + if (from == to) + printf ("0x%s 0x%04X\n", fromgb, fromuni); + else + printf ("0x%s-0x%s 0x%04X\n", fromgb, index_to_gb(to), fromuni); +} + +/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ { + unicode = decode_hex(substr($1, 3, 4)); + if ($2 ~ /\\x8[1-4]\\x3[0-9]\\x[8-9A-F][0-9A-F]\\x3[0-9]/) + unitable[unicode] = decode_gb($2); + else + unitable[unicode] = -1; +} + +END { + lastgb = 0; + surrogate_min = decode_hex("D800"); + surrogate_max = decode_hex("DFFF"); + lastgb = unitable[128]; + gbtable[lastgb] = 128; + for (i = 129; i < 65536; i++) + { + if (unitable[i] == 0 && (i < surrogate_min || i > surrogate_max)) + { + lastgb++; + gbtable[lastgb] = i; + unitable[i] = lastgb; + } + else if (unitable[i] > 0) + { + lastgb = unitable[i]; + gbtable[lastgb] = i; + } + } + + fromgb = lastgb = unitable[128]; + for (i = 129; i < 65536; i++) + { + if (unitable[i] > 0) + { + if (lastgb + 1 == unitable[i]) + { + lastgb++; + } + else + { + if (lastgb >= 0) + printline(fromgb, lastgb); + fromgb = lastgb = unitable[i]; + } + } + else # i.e. (unitable[i] < 0) + { + if (lastgb >= 0) + printline(fromgb, lastgb); + lastgb = -1; + } + } + printline(fromgb, unitable[65535]); +}