Mercurial > emacs
changeset 90651:23c1467f8640
(detect_coding_charset): Fix detection of multi-byte
charset.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 08 Nov 2006 04:28:29 +0000 |
parents | 02cf29720f31 |
children | ae3365fe9a16 |
files | src/coding.c |
diffstat | 1 files changed, 51 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/src/coding.c Tue Nov 07 23:22:48 2006 +0000 +++ b/src/coding.c Wed Nov 08 04:28:29 2006 +0000 @@ -4731,6 +4731,7 @@ int consumed_chars = 0; Lisp_Object attrs, valids; int found = 0; + int head_ascii = coding->head_ascii; detect_info->checked |= CATEGORY_MASK_CHARSET; @@ -4739,21 +4740,68 @@ valids = AREF (attrs, coding_attr_charset_valids); if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) - src += coding->head_ascii; + src += head_ascii; while (1) { int c; + Lisp_Object val; + struct charset *charset; + int dim, idx; src_base = src; ONE_MORE_BYTE (c); if (c < 0) continue; - if (NILP (AREF (valids, c))) + val = AREF (valids, c); + if (NILP (val)) break; if (c >= 0x80) found = CATEGORY_MASK_CHARSET; - } + if (INTEGERP (val)) + { + charset = CHARSET_FROM_ID (XFASTINT (val)); + dim = CHARSET_DIMENSION (charset); + for (idx = 1; idx < dim; idx++) + { + if (src == src_end) + goto too_short; + ONE_MORE_BYTE (c); + if (c < charset->code_space[(dim - 1 - idx) * 2] + || c > charset->code_space[(dim - 1 - idx) * 2 + 1]) + break; + } + if (idx < dim) + break; + } + else + { + idx = 1; + for (; CONSP (val); val = XCDR (val)) + { + charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); + dim = CHARSET_DIMENSION (charset); + while (idx < dim) + { + if (src == src_end) + goto too_short; + ONE_MORE_BYTE (c); + if (c < charset->code_space[(dim - 1 - idx) * 4] + || c > charset->code_space[(dim - 1 - idx) * 4 + 1]) + break; + idx++; + } + if (idx == dim) + { + val = Qnil; + break; + } + } + if (CONSP (val)) + break; + } + } + too_short: detect_info->rejected |= CATEGORY_MASK_CHARSET; return 0;