emacs: src/lread.c comparison

comparison src/lread.c @ 43080:a6382f0fcb2a

(read1): Redesign strategy for force_multibyte and force_singlebyte. Now is_multibyte records whether read_buffer is multibyte. Encountering any multibyte character makes it so.

author	Richard M. Stallman <rms@gnu.org>
date	Sun, 03 Feb 2002 10:35:20 +0000
parents	2fc2abcdc67c
children	4bd6b6b21deb 0b4249d736a0

comparison

equal deleted inserted replaced

-:a202d9fb56eb
+:a6382f0fcb2a
 case '"':
 {
 	char *p = read_buffer;
 	char *end = read_buffer + read_buffer_size;
 	register int c;
-	/* Nonzero if we saw an escape sequence specifying
+	/* 1 if we saw an escape sequence specifying
-	   a multibyte character.  */
+	   a multibyte character, or a multibyte character.  */
 	int force_multibyte = 0;
-	/* Nonzero if we saw an escape sequence specifying
+	/* 1 if we saw an escape sequence specifying
 	   a single-byte character.  */
 	int force_singlebyte = 0;
+	/* 1 if read_buffer contains multibyte text now.  */
+	int is_multibyte = 0;
 	int cancel = 0;
-	int nchars;
+	int nchars = 0;
 	while ((c = READCHAR) >= 0
 	       && c != '\"')
 	  {
 	    if (end - p < MAX_MULTIBYTE_LENGTH)
 		  force_singlebyte = 1;
 		else if (byterep == 2)
 		  force_multibyte = 1;
 	      }
-	    if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+	    /* A character that must be multibyte forces multibyte.  */
+	    if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
+	      force_multibyte = 1;
+	    /* If we just discovered the need to be multibyte,
+	       convert the text accumulated thus far.  */
+	    if (force_multibyte && ! is_multibyte)
 	      {
-		/* Any modifiers for a multibyte character are invalid.  */
+		is_multibyte = 1;
-		if (c & CHAR_MODIFIER_MASK)
+		to_multibyte (&p, &end, &nchars);
-		  error ("Invalid modifier in string");
-		p += CHAR_STRING (c, p);
-		force_multibyte = 1;
 	      }
+	    /* Allow `\C- ' and `\C-?'.  */
+	    if (c == (CHAR_CTL | ' '))
+	      c = 0;
+	    else if (c == (CHAR_CTL | '?'))
+	      c = 127;
+	    if (c & CHAR_SHIFT)
+	      {
+		/* Shift modifier is valid only with [A-Za-z].  */
+		if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+		  c &= ~CHAR_SHIFT;
+		else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+		  c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+	      }
+	    if (c & CHAR_META)
+	      /* Move the meta bit to the right place for a string.  */
+	      c = (c & ~CHAR_META) | 0x80;
+	    if (c & CHAR_MODIFIER_MASK)
+	      error ("Invalid modifier in string");
+	    if (is_multibyte)
+	      p += CHAR_STRING (c, p);
 	    else
-	      {
+	      *p++ = c;
-		/* Allow `\C- ' and `\C-?'.  */
-		if (c == (CHAR_CTL | ' '))
+	    nchars++;
-		  c = 0;
-		else if (c == (CHAR_CTL | '?'))
-		  c = 127;
-		if (c & CHAR_SHIFT)
-		  {
-		    /* Shift modifier is valid only with [A-Za-z].  */
-		    if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
-		      c &= ~CHAR_SHIFT;
-		    else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
-		      c = (c & ~CHAR_SHIFT) - ('a' - 'A');
-		  }
-		if (c & CHAR_META)
-		  /* Move the meta bit to the right place for a string.  */
-		  c = (c & ~CHAR_META) | 0x80;
-		if (c & ~0xff)
-		  error ("Invalid modifier in string");
-		*p++ = c;
-	      }
 	  }
 	if (c < 0)
 	  end_of_file_error ();
 	/* If purifying, and string starts with \ newline,
 	   return zero instead.  This is for doc strings
 	   that we are really going to find in etc/DOC.nn.nn  */
 	if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
 	  return make_number (0);
-	if (force_multibyte)
+	if (is_multibyte || force_singlebyte)
-	  to_multibyte (&p, &end, &nchars);
+	  ;
-	else if (force_singlebyte)
-	  nchars = p - read_buffer;
 	else if (load_convert_to_unibyte)
 	  {
 	    Lisp_Object string;
 	    to_multibyte (&p, &end, &nchars);
 	    if (p - read_buffer != nchars)
 	      {
 		string = make_multibyte_string (read_buffer, nchars,
 						p - read_buffer);
 		return Fstring_make_unibyte (string);
 	      }
+	    /* We can make a unibyte string directly.  */
+	    is_multibyte = 0;
 	  }
 	else if (EQ (readcharfun, Qget_file_char)
 		 || EQ (readcharfun, Qlambda))
 	  {
 	    /* Nowadays, reading directly from a file is used only for
 	       compiled Emacs Lisp files, and those always use the
 	       Emacs internal encoding.  Meanwhile, Qlambda is used
 	       for reading dynamic byte code (compiled with
 	       byte-compile-dynamic = t).  */
 	    to_multibyte (&p, &end, &nchars);
+	    is_multibyte = 1;
 	  }
 	else
 	  /* In all other cases, if we read these bytes as
 	     separate characters, treat them as separate characters now.  */
-	  nchars = p - read_buffer;
+	  ;
 	if (read_pure)
 	  return make_pure_string (read_buffer, nchars, p - read_buffer,
-				   (force_multibyte
+				   is_multibyte);
-				    || (p - read_buffer != nchars)));
 	return make_specified_string (read_buffer, nchars, p - read_buffer,
-				      (force_multibyte
+				      is_multibyte);
-				       || (p - read_buffer != nchars)));
 }
 case '.':
 {
 	int next_char = READCHAR;

Mercurial > emacs

comparison src/lread.c @ 43080:a6382f0fcb2a