changeset 90651:23c1467f8640

(detect_coding_charset): Fix detection of multi-byte charset.
author Kenichi Handa <handa@m17n.org>
date Wed, 08 Nov 2006 04:28:29 +0000
parents 02cf29720f31
children ae3365fe9a16
files src/coding.c
diffstat 1 files changed, 51 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/coding.c	Tue Nov 07 23:22:48 2006 +0000
+++ b/src/coding.c	Wed Nov 08 04:28:29 2006 +0000
@@ -4731,6 +4731,7 @@
   int consumed_chars = 0;
   Lisp_Object attrs, valids;
   int found = 0;
+  int head_ascii = coding->head_ascii;
 
   detect_info->checked |= CATEGORY_MASK_CHARSET;
 
@@ -4739,21 +4740,68 @@
   valids = AREF (attrs, coding_attr_charset_valids);
 
   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
-    src += coding->head_ascii;
+    src += head_ascii;
 
   while (1)
     {
       int c;
+      Lisp_Object val;
+      struct charset *charset;
+      int dim, idx;
 
       src_base = src;
       ONE_MORE_BYTE (c);
       if (c < 0)
 	continue;
-      if (NILP (AREF (valids, c)))
+      val = AREF (valids, c);
+      if (NILP (val))
 	break;
       if (c >= 0x80)
 	found = CATEGORY_MASK_CHARSET;
-    }
+      if (INTEGERP (val))
+	{
+	  charset = CHARSET_FROM_ID (XFASTINT (val));
+	  dim = CHARSET_DIMENSION (charset);
+	  for (idx = 1; idx < dim; idx++)
+	    {
+	      if (src == src_end)
+		goto too_short;
+	      ONE_MORE_BYTE (c);
+	      if (c < charset->code_space[(dim - 1 - idx) * 2] 
+		  || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
+		break;
+	    }
+	  if (idx < dim)
+	    break;
+	}
+      else
+	{
+	  idx = 1;
+	  for (; CONSP (val); val = XCDR (val))
+	    {
+	      charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
+	      dim = CHARSET_DIMENSION (charset);
+	      while (idx < dim)
+		{
+		  if (src == src_end)
+		    goto too_short;
+		  ONE_MORE_BYTE (c);
+		  if (c < charset->code_space[(dim - 1 - idx) * 4]
+		      || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
+		    break;
+		  idx++;
+		}
+	      if (idx == dim)
+		{
+		  val = Qnil;
+		  break;
+		}
+	    }
+	  if (CONSP (val))
+	    break;
+	}
+    }
+ too_short:
   detect_info->rejected |= CATEGORY_MASK_CHARSET;
   return 0;