changeset 43080:a6382f0fcb2a

(read1): Redesign strategy for force_multibyte and force_singlebyte. Now is_multibyte records whether read_buffer is multibyte. Encountering any multibyte character makes it so.
author Richard M. Stallman <rms@gnu.org>
date Sun, 03 Feb 2002 10:35:20 +0000
parents a202d9fb56eb
children 6307d3a2321b
files src/lread.c
diffstat 1 files changed, 50 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/src/lread.c	Sun Feb 03 10:33:01 2002 +0000
+++ b/src/lread.c	Sun Feb 03 10:35:20 2002 +0000
@@ -2145,14 +2145,16 @@
 	char *p = read_buffer;
 	char *end = read_buffer + read_buffer_size;
 	register int c;
-	/* Nonzero if we saw an escape sequence specifying
-	   a multibyte character.  */
+	/* 1 if we saw an escape sequence specifying
+	   a multibyte character, or a multibyte character.  */
 	int force_multibyte = 0;
-	/* Nonzero if we saw an escape sequence specifying
+	/* 1 if we saw an escape sequence specifying
 	   a single-byte character.  */
 	int force_singlebyte = 0;
+	/* 1 if read_buffer contains multibyte text now.  */
+	int is_multibyte = 0;
 	int cancel = 0;
-	int nchars;
+	int nchars = 0;
 
 	while ((c = READCHAR) >= 0
 	       && c != '\"')
@@ -2186,39 +2188,47 @@
 		  force_multibyte = 1;
 	      }
 
-	    if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+	    /* A character that must be multibyte forces multibyte.  */
+	    if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
+	      force_multibyte = 1;
+
+	    /* If we just discovered the need to be multibyte,
+	       convert the text accumulated thus far.  */
+	    if (force_multibyte && ! is_multibyte)
 	      {
-		/* Any modifiers for a multibyte character are invalid.  */
-		if (c & CHAR_MODIFIER_MASK)
-		  error ("Invalid modifier in string");
-		p += CHAR_STRING (c, p);
-		force_multibyte = 1;
+		is_multibyte = 1;
+		to_multibyte (&p, &end, &nchars);
 	      }
-	    else
+
+	    /* Allow `\C- ' and `\C-?'.  */
+	    if (c == (CHAR_CTL | ' '))
+	      c = 0;
+	    else if (c == (CHAR_CTL | '?'))
+	      c = 127;
+
+	    if (c & CHAR_SHIFT)
 	      {
-		/* Allow `\C- ' and `\C-?'.  */
-		if (c == (CHAR_CTL | ' '))
-		  c = 0;
-		else if (c == (CHAR_CTL | '?'))
-		  c = 127;
-
-		if (c & CHAR_SHIFT)
-		  {
-		    /* Shift modifier is valid only with [A-Za-z].  */
-		    if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
-		      c &= ~CHAR_SHIFT;
-		    else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
-		      c = (c & ~CHAR_SHIFT) - ('a' - 'A');
-		  }
-
-		if (c & CHAR_META)
-		  /* Move the meta bit to the right place for a string.  */
-		  c = (c & ~CHAR_META) | 0x80;
-		if (c & ~0xff)
-		  error ("Invalid modifier in string");
-		*p++ = c;
+		/* Shift modifier is valid only with [A-Za-z].  */
+		if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+		  c &= ~CHAR_SHIFT;
+		else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+		  c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 	      }
+
+	    if (c & CHAR_META)
+	      /* Move the meta bit to the right place for a string.  */
+	      c = (c & ~CHAR_META) | 0x80;
+	    if (c & CHAR_MODIFIER_MASK)
+	      error ("Invalid modifier in string");
+
+	    if (is_multibyte)
+	      p += CHAR_STRING (c, p);
+	    else
+	      *p++ = c;
+
+	    nchars++;
 	  }
+
 	if (c < 0)
 	  end_of_file_error ();
 
@@ -2228,10 +2238,8 @@
 	if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
 	  return make_number (0);
 
-	if (force_multibyte)
-	  to_multibyte (&p, &end, &nchars);
-	else if (force_singlebyte)
-	  nchars = p - read_buffer;
+	if (is_multibyte || force_singlebyte)
+	  ;
 	else if (load_convert_to_unibyte)
 	  {
 	    Lisp_Object string;
@@ -2242,6 +2250,8 @@
 						p - read_buffer);
 		return Fstring_make_unibyte (string);
 	      }
+	    /* We can make a unibyte string directly.  */
+	    is_multibyte = 0;
 	  }
 	else if (EQ (readcharfun, Qget_file_char)
 		 || EQ (readcharfun, Qlambda))
@@ -2252,19 +2262,18 @@
 	       for reading dynamic byte code (compiled with
 	       byte-compile-dynamic = t).  */
 	    to_multibyte (&p, &end, &nchars);
+	    is_multibyte = 1;
 	  }
 	else
 	  /* In all other cases, if we read these bytes as
 	     separate characters, treat them as separate characters now.  */
-	  nchars = p - read_buffer;
+	  ;
 
 	if (read_pure)
 	  return make_pure_string (read_buffer, nchars, p - read_buffer,
-				   (force_multibyte
-				    || (p - read_buffer != nchars)));
+				   is_multibyte);
 	return make_specified_string (read_buffer, nchars, p - read_buffer,
-				      (force_multibyte
-				       || (p - read_buffer != nchars)));
+				      is_multibyte);
       }
 
     case '.':