changeset 19280:e755044718ee

(ENCODE_ISO_CHARACTER_DIMENSION1): Pay attention to CODING_FLAG_ISO_SAFE. (ENCODE_ISO_CHARACTER_DIMENSION2): Likewise. (safe_terminal_coding): New variable. (Fset_safe_terminal_coding_system_internal): New function. (init_coding_once): Initilize safe_terminal_coding. (syms_of_coding): Declare set-safe-terminal-coding-system as a Lisp function. (Vmicrosoft_code_table): New variable. (syms_of_coding): Declare it as a Lisp variable and initialize it. (detect_coding_mask): Pay attention to Vmicrosoft_code_table.
author Kenichi Handa <handa@m17n.org>
date Sun, 10 Aug 1997 04:13:19 +0000
parents 3217a3ba8ef7
children 5834b16ab7fa
files src/coding.c
diffstat 1 files changed, 178 insertions(+), 113 deletions(-) [+]
line wrap: on
line diff
--- a/src/coding.c	Sun Aug 10 04:13:19 1997 +0000
+++ b/src/coding.c	Sun Aug 10 04:13:19 1997 +0000
@@ -278,13 +278,21 @@
 /* Coding-system actually used in the latest I/O.  */
 Lisp_Object Vlast_coding_system_used;
 
+/* A vector of length 256 which contains information about special
+   Microsoft codes.  */
+Lisp_Object Vmicrosoft_code_table;
+
 /* Flag to inhibit code conversion of end-of-line format.  */
 int inhibit_eol_conversion;
 
-/* Coding-system of what terminal accept for displaying.  */
+/* Coding system to be used to encode text for terminal display.  */
 struct coding_system terminal_coding;
 
-/* Coding-system of what is sent from terminal keyboard.  */
+/* Coding system to be used to encode text for terminal display when
+   terminal coding system is nil.  */
+struct coding_system safe_terminal_coding;
+
+/* Coding system of what is sent from terminal keyboard.  */
 struct coding_system keyboard_coding;
 
 Lisp_Object Vfile_coding_system_alist;
@@ -681,7 +689,16 @@
 	  if (c < 0x80)
 	    break;
 	  else if (c < 0xA0)
-	    return 0;
+	    {
+	      if (VECTORP (Vmicrosoft_code_table)
+		  && !NILP (XVECTOR (Vmicrosoft_code_table)->contents[c]))
+		{
+		  mask &= ~(CODING_CATEGORY_MASK_ISO_7
+			    | CODING_CATEGORY_MASK_ISO_7_ELSE);
+		  break;
+		}
+	      return 0;
+	    }
 	  else
 	    {
 	      unsigned char *src_begin = src;
@@ -1165,66 +1182,88 @@
    sequences are also produced in advance if necessary.  */
 
 
-#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)			\
-  do {									\
-    if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))			\
-      {									\
-	if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)			\
-	  *dst++ = c1 & 0x7F;						\
-	else								\
-	  *dst++ = c1 | 0x80;						\
-	CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;			\
-	break;								\
-      }									\
-    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))	\
-      {									\
-	*dst++ = c1 & 0x7F;						\
-	break;								\
-      }									\
-    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))	\
-      {									\
-	*dst++ = c1 | 0x80;						\
-	break;								\
-      }									\
-    else								\
-      /* Since CHARSET is not yet invoked to any graphic planes, we	\
-	 must invoke it, or, at first, designate it to some graphic	\
-	 register.  Then repeat the loop to actually produce the	\
-	 character.  */							\
-      dst = encode_invocation_designation (charset, coding, dst);	\
+#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)			 \
+  do {									 \
+    if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))			 \
+      {									 \
+	if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)			 \
+	  *dst++ = c1 & 0x7F;						 \
+	else								 \
+	  *dst++ = c1 | 0x80;						 \
+	CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;			 \
+	break;								 \
+      }									 \
+    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))	 \
+      {									 \
+	*dst++ = c1 & 0x7F;						 \
+	break;								 \
+      }									 \
+    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))	 \
+      {									 \
+	*dst++ = c1 | 0x80;						 \
+	break;								 \
+      }									 \
+    else if (coding->flags & CODING_FLAG_ISO_SAFE			 \
+	     && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) \
+		 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))		 \
+      {									 \
+	/* We should not encode this character, instead produce one or	 \
+	   two `?'s.  */						 \
+	*dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;			 \
+	if (CHARSET_WIDTH (charset) == 2)				 \
+	  *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;		 \
+	break;								 \
+      }									 \
+    else								 \
+      /* Since CHARSET is not yet invoked to any graphic planes, we	 \
+	 must invoke it, or, at first, designate it to some graphic	 \
+	 register.  Then repeat the loop to actually produce the	 \
+	 character.  */							 \
+      dst = encode_invocation_designation (charset, coding, dst);	 \
   } while (1)
 
 /* Produce codes for a DIMENSION2 character whose character set is
    CHARSET and whose position-codes are C1 and C2.  Designation and
    invocation codes are also produced in advance if necessary.  */
 
-#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)		\
-  do {									\
-    if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))			\
-      {									\
-	if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)			\
-	  *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;			\
-	else								\
-	  *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;			\
-	CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;			\
-	break;								\
-      }									\
-    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))	\
-      {									\
-	*dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;				\
-	break;								\
-      }									\
-    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))	\
-      {									\
-	*dst++ = c1 | 0x80, *dst++= c2 | 0x80;				\
-	break;								\
-      }									\
-    else								\
-      /* Since CHARSET is not yet invoked to any graphic planes, we	\
-	 must invoke it, or, at first, designate it to some graphic	\
-	 register.  Then repeat the loop to actually produce the	\
-	 character.  */							\
-      dst = encode_invocation_designation (charset, coding, dst);	\
+#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)		 \
+  do {									 \
+    if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))			 \
+      {									 \
+	if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)			 \
+	  *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;			 \
+	else								 \
+	  *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;			 \
+	CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;			 \
+	break;								 \
+      }									 \
+    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))	 \
+      {									 \
+	*dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;				 \
+	break;								 \
+      }									 \
+    else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))	 \
+      {									 \
+	*dst++ = c1 | 0x80, *dst++= c2 | 0x80;				 \
+	break;								 \
+      }									 \
+    else if (coding->flags & CODING_FLAG_ISO_SAFE			 \
+	     && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) \
+		 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))		 \
+      {									 \
+	/* We should not encode this character, instead produce one or	 \
+	   two `?'s.  */						 \
+	*dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;			 \
+	if (CHARSET_WIDTH (charset) == 2)				 \
+	  *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;		 \
+	break;								 \
+      }									 \
+    else								 \
+      /* Since CHARSET is not yet invoked to any graphic planes, we	 \
+	 must invoke it, or, at first, designate it to some graphic	 \
+	 register.  Then repeat the loop to actually produce the	 \
+	 character.  */							 \
+      dst = encode_invocation_designation (charset, coding, dst);	 \
   } while (1)
 
 #define ENCODE_ISO_CHARACTER(charset, c1, c2)				  \
@@ -2331,7 +2370,9 @@
 	     | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
 	     | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
 	     | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
-	     | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
+	     | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+	     | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
+	     );
 
 	/* Invoke graphic register 0 to plane 0.  */
 	CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
@@ -2415,34 +2456,35 @@
 	      default_reg_bits &= 3;
 	  }
 
-	for (charset = 0; charset <= MAX_CHARSET; charset++)
-	  if (CHARSET_VALID_P (charset)
-	      && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
-		  == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
-	    {
-	      /* We have not yet decided where to designate CHARSET.  */
-	      int reg_bits = default_reg_bits;
-
-	      if (CHARSET_CHARS (charset) == 96)
-		/* A charset of CHARS96 can't be designated to REG 0.  */
-		reg_bits &= ~1;
-
-	      if (reg_bits)
-		/* There exist some default graphic register.  */
-		CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
-		  = (reg_bits & 1
-		     ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
-	      else
-		/* We anyway have to designate CHARSET to somewhere.  */
-		CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
-		  = (CHARSET_CHARS (charset) == 94
-		     ? 0
-		     : ((coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT
-			 || ! coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
-			? 1
-			: (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT
-			   ? 2 : 0)));
-	    }
+	if (! (coding->flags & CODING_FLAG_ISO_SAFE))
+	  for (charset = 0; charset <= MAX_CHARSET; charset++)
+	    if (CHARSET_VALID_P (charset)
+		&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+		    == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
+	      {
+		/* We have not yet decided where to designate CHARSET.  */
+		int reg_bits = default_reg_bits;
+
+		if (CHARSET_CHARS (charset) == 96)
+		  /* A charset of CHARS96 can't be designated to REG 0.  */
+		  reg_bits &= ~1;
+
+		if (reg_bits)
+		  /* There exist some default graphic register.  */
+		  CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+		    = (reg_bits & 1
+		       ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
+		else
+		  /* We anyway have to designate CHARSET to somewhere.  */
+		  CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+		    = (CHARSET_CHARS (charset) == 94
+		       ? 0
+		       : ((coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT
+			   || ! coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
+			  ? 1
+			  : (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT
+			     ? 2 : 0)));
+	      }
       }
       coding->require_flushing = 1;
       break;
@@ -2608,34 +2650,33 @@
 	/* No valid ISO2022 code follows C.  Try again.  */
 	goto label_loop_detect_coding;
     }
-  else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
-    /* C is an ISO2022 specific control code of C1,
-       or the first byte of SJIS's 2-byte character code,
-       or a leading code of Emacs.  */
-    mask = (detect_coding_iso2022 (src, src_end)
-	    | detect_coding_sjis (src, src_end)
-	    | detect_coding_emacs_mule (src, src_end)
-	    | CODING_CATEGORY_MASK_BINARY);
-
-  else if (c == ISO_CODE_CSI
-	   && (src < src_end
-	      && (*src == ']'
-		  || (src + 1 < src_end
-		      && src[1] == ']'
-		      && (*src == '0' || *src == '1' || *src == '2')))))
-    /* C is an ISO2022's control-sequence-introducer.  */
-    mask = (detect_coding_iso2022 (src, src_end)
-	    | detect_coding_sjis (src, src_end)
-	    | detect_coding_emacs_mule (src, src_end)
-	    | CODING_CATEGORY_MASK_BINARY);
-    
   else if (c < 0xA0)
-    /* C is the first byte of SJIS character code,
-       or a leading-code of Emacs.  */
-    mask = (detect_coding_sjis (src, src_end)
-	    | detect_coding_emacs_mule (src, src_end)
-	    | CODING_CATEGORY_MASK_BINARY);
-
+    {
+      /* If C is a special Microsoft code,
+	 or is an ISO2022 specific control code of C1 (SS2 or SS3), 
+	 or is an ISO2022 control-sequence-introducer (CSI),
+	 we should also consider the possibility of someof ISO2022 codings.  */
+      if ((VECTORP (Vmicrosoft_code_table)
+	   && !NILP (XVECTOR (Vmicrosoft_code_table)->contents[c]))
+	  || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
+	  || (c == ISO_CODE_CSI
+	      && (src < src_end
+		  && (*src == ']'
+		      || (src + 1 < src_end
+			  && src[1] == ']'
+			  && (*src == '0' || *src == '1' || *src == '2'))))))
+	mask = (detect_coding_iso2022 (src, src_end)
+		| detect_coding_sjis (src, src_end)
+		| detect_coding_emacs_mule (src, src_end)
+		| CODING_CATEGORY_MASK_BINARY);
+
+      else
+	/* C is the first byte of SJIS character code, or a
+	   leading-code of Emacs.  */
+	mask = (detect_coding_sjis (src, src_end)
+		| detect_coding_emacs_mule (src, src_end)
+		| CODING_CATEGORY_MASK_BINARY);
+    }
   else
     /* C is a character of ISO2022 in graphic plane right,
        or a SJIS's 1-byte character code (i.e. JISX0201),
@@ -3547,6 +3588,18 @@
   return Qnil;
 }
 
+DEFUN ("set-safe-terminal-coding-system-internal",
+       Fset_safe_terminal_coding_system_internal,
+       Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
+  (coding_system)
+     Lisp_Object coding_system;
+{
+  CHECK_SYMBOL (coding_system, 0);
+  setup_coding_system (Fcheck_coding_system (coding_system),
+		       &safe_terminal_coding);
+  return Qnil;
+}
+
 DEFUN ("terminal-coding-system",
        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
   "Return coding-system of your terminal.")
@@ -3710,6 +3763,7 @@
 
   setup_coding_system (Qnil, &keyboard_coding);
   setup_coding_system (Qnil, &terminal_coding);
+  setup_coding_system (Qnil, &safe_terminal_coding);
 
 #if defined (MSDOS) || defined (WINDOWSNT)
   system_eol_type = CODING_EOL_CRLF;
@@ -3824,6 +3878,7 @@
   defsubr (&Sdecode_big5_char);
   defsubr (&Sencode_big5_char);
   defsubr (&Sset_terminal_coding_system_internal);
+  defsubr (&Sset_safe_terminal_coding_system_internal);
   defsubr (&Sterminal_coding_system);
   defsubr (&Sset_keyboard_coding_system_internal);
   defsubr (&Skeyboard_coding_system);
@@ -3954,6 +4009,16 @@
 The car part is used for decoding a process output,\n\
 the cdr part is used for encoding a text to be sent to a process.");
   Vdefault_process_coding_system = Qnil;
+
+  DEFVAR_LISP ("special-microsoft-code-table", &Vmicrosoft_code_table,
+    "Table of special Microsoft codes in the range 128..159 (inclusive).\n\
+This is a vector of length 256.\n\
+If Nth element is non-nil, the existence of code N in a file\n\
+(or output of subprocess) doesn't prevent it to be detected as\n\
+a coding system of ISO 2022 variant (e.g. iso-latin-1) on reading a file\n\
+or reading output of a subprocess.\n\
+Only 128th through 159th elements has a meaning.");
+  Vmicrosoft_code_table = Fmake_vector (make_number (256), Qnil);
 }
 
 #endif /* emacs */