changeset 89420:c3e67ce6ee0f

(Qsignature, Qendian): Delete these variables. (syms_of_coding): Don't initialize them. (CATEGORY_MASK_UTF_16_AUTO): New macro. (detect_coding_utf_16): Add CATEGORY_MASK_UTF_16_AUTO in detect_info->found. (decode_coding_utf_16): Don't detect BOM here. (encode_coding_utf_16): Produce BOM if CODING_UTF_16_BOM (coding) is NOT utf_16_without_bom. (setup_coding_system): For a coding system of type utf-16, check if the attribute :endian is Qbig or not (not nil or not), and set CODING_REQUIRE_DETECTION_MASK if BOM detection is required. (detect_coding): If coding type is utf-16 and BOM detection is required, detect it. (Fdefine_coding_system_internal): For a coding system of type utf-16, check if the attribute :endian is Qbig or not (not nil or not).
author Kenichi Handa <handa@m17n.org>
date Tue, 06 May 2003 12:28:11 +0000
parents 18e57407a82b
children 8357b304ef57
files src/coding.c
diffstat 1 files changed, 51 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/src/coding.c	Tue May 06 08:22:13 2003 +0000
+++ b/src/coding.c	Tue May 06 12:28:11 2003 +0000
@@ -308,7 +308,7 @@
 Lisp_Object Qdefault_char;
 Lisp_Object Qno_conversion, Qundecided;
 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
-Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
+Lisp_Object Qbig, Qlittle;
 Lisp_Object Qcoding_system_history;
 Lisp_Object Qvalid_codes;
 
@@ -626,6 +626,7 @@
 #define CATEGORY_MASK_ISO_7_ELSE	(1 << coding_category_iso_7_else)
 #define CATEGORY_MASK_ISO_8_ELSE	(1 << coding_category_iso_8_else)
 #define CATEGORY_MASK_UTF_8		(1 << coding_category_utf_8)
+#define CATEGORY_MASK_UTF_16_AUTO	(1 << coding_category_utf_16_auto)
 #define CATEGORY_MASK_UTF_16_BE		(1 << coding_category_utf_16_be)
 #define CATEGORY_MASK_UTF_16_LE		(1 << coding_category_utf_16_le)
 #define CATEGORY_MASK_UTF_16_BE_NOSIG	(1 << coding_category_utf_16_be_nosig)
@@ -1357,12 +1358,14 @@
 
   if ((c1 == 0xFF) && (c2 == 0xFE))
     {
-      detect_info->found |= CATEGORY_MASK_UTF_16_LE;
+      detect_info->found |= (CATEGORY_MASK_UTF_16_LE
+			     | CATEGORY_MASK_UTF_16_AUTO);
       detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
     }
   else if ((c1 == 0xFE) && (c2 == 0xFF))
     {
-      detect_info->found |= CATEGORY_MASK_UTF_16_BE;
+      detect_info->found |= (CATEGORY_MASK_UTF_16_BE
+			     | CATEGORY_MASK_UTF_16_AUTO);
       detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
     }
  no_more_source:
@@ -1387,7 +1390,7 @@
 
   CODING_GET_INFO (coding, attr, eol_type, charset_list);
 
-  if (bom != utf_16_without_bom)
+  if (bom == utf_16_with_bom)
     {
       int c, c1, c2;
 
@@ -1395,33 +1398,22 @@
       ONE_MORE_BYTE (c1);
       ONE_MORE_BYTE (c2);
       c = (c1 << 8) | c2;
-      if (bom == utf_16_with_bom)
-	{
-	  if (endian == utf_16_big_endian
-	      ? c != 0xFEFF : c != 0xFFFE)
-	    {
-	      /* We are sure that there's enouph room at CHARBUF.  */
-	      *charbuf++ = c1;
-	      *charbuf++ = c2;
-	      coding->errors++;
-	    }
-	}
-      else
+
+      if (endian == utf_16_big_endian
+	  ? c != 0xFEFF : c != 0xFFFE)
 	{
-	  if (c == 0xFEFF)
-	    CODING_UTF_16_ENDIAN (coding)
-	      = endian = utf_16_big_endian;
-	  else if (c == 0xFFFE)
-	    CODING_UTF_16_ENDIAN (coding)
-	      = endian = utf_16_little_endian;
-	  else
-	    {
-	      CODING_UTF_16_ENDIAN (coding)
-		= endian = utf_16_big_endian;
-	      src = src_base;
-	    }
+	  /* The first two bytes are not BOM.  Treat them as bytes
+	     for a normal character.  */
+	  src = src_base;
+	  coding->errors++;
 	}
-      CODING_UTF_16_BOM (coding) = utf_16_with_bom;
+      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+    }
+  else if (bom == utf_16_detect_bom)
+    {
+      /* We have already tried to detect BOM and failed in
+	 detect_coding.  */
+      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
     }
 
   while (1)
@@ -1494,7 +1486,7 @@
 
   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
 
-  if (bom == utf_16_with_bom)
+  if (bom != utf_16_without_bom)
     {
       ASSURE_DESTINATION (safe_room);
       if (big_endian)
@@ -4859,7 +4851,7 @@
 				    : EQ (val, Qt) ? utf_16_with_bom
 				    : utf_16_without_bom);
       val = AREF (attrs, coding_attr_utf_16_endian);
-      CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
+      CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
 				       : utf_16_little_endian);
       CODING_UTF_16_SURROGATE (coding) = 0;
       coding->detector = detect_coding_utf_16;
@@ -4867,6 +4859,8 @@
       coding->encoder = encode_coding_utf_16;
       coding->common_flags
 	|= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+      if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
+	coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
     }
   else if (EQ (coding_type, Qccl))
     {
@@ -5285,6 +5279,25 @@
 		}
 	}
     }
+  else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
+    {
+      Lisp_Object coding_systems;
+      struct coding_detection_info detect_info;
+
+      coding_systems
+	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
+      detect_info.found = detect_info.rejected = 0;
+      if (CONSP (coding_systems)
+	  && detect_coding_utf_16 (coding, &detect_info)
+	  && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
+				   | CATEGORY_MASK_UTF_16_BE)))
+	{
+	  if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+	    setup_coding_system (XCAR (coding_systems), coding);
+	  else
+	    setup_coding_system (XCDR (coding_systems), coding);
+	}
+    }
 
   attrs = CODING_ID_ATTRS (coding->id);
   coding_type = CODING_ATTR_TYPE (attrs);
@@ -7957,15 +7970,20 @@
       ASET (attrs, coding_attr_utf_16_bom, bom);
 
       endian = args[coding_arg_utf16_endian];
+      CHECK_SYMBOL (endian);
+      if (NILP (endian))
+	endian = Qbig;
+      else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
+	error ("Invalid endian: %s", XSYMBOL (endian)->name->data);
       ASET (attrs, coding_attr_utf_16_endian, endian);
 
       category = (CONSP (bom)
 		  ? coding_category_utf_16_auto
 		  : NILP (bom)
-		  ? (NILP (endian)
+		  ? (EQ (endian, Qbig)
 		     ? coding_category_utf_16_be_nosig
 		     : coding_category_utf_16_le_nosig)
-		  : (NILP (endian)
+		  : (EQ (endian, Qbig)
 		     ? coding_category_utf_16_be
 		     : coding_category_utf_16_le));
     }
@@ -8407,8 +8425,6 @@
   DEFSYM (Qutf_8, "utf-8");
 
   DEFSYM (Qutf_16, "utf-16");
-  DEFSYM (Qsignature, "signature");
-  DEFSYM (Qendian, "endian");
   DEFSYM (Qbig, "big");
   DEFSYM (Qlittle, "little");