emacs: src/lread.c comparison

comparison src/lread.c @ 89483:2f877ed80fa6

*** empty log message ***

author	Kenichi Handa <handa@m17n.org>
date	Mon, 08 Sep 2003 12:53:41 +0000
parents	375f2633d815 85b37317d5ea
children	68c22ea6027c

comparison

equal deleted inserted replaced

-:375f2633d815
+:2f877ed80fa6
 #include <sys/file.h>
 #include <errno.h>
 #include "lisp.h"
 #include "intervals.h"
 #include "buffer.h"
+#include "character.h"
 #include "charset.h"
+#include "coding.h"
 #include <epaths.h>
 #include "commands.h"
 #include "keyboard.h"
 #include "termhooks.h"
 #include "coding.h"
 Lisp_Object Qvariable_documentation, Vvalues, Vstandard_input, Vafter_load_alist;
 Lisp_Object Qascii_character, Qload, Qload_file_name;
 Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction;
 Lisp_Object Qinhibit_file_name_operation;
+/* Used instead of Qget_file_char while loading *.elc files compiled
+by Emacs 21 or older.  */
+static Lisp_Object Qget_emacs_mule_file_char;
+static Lisp_Object Qload_force_doc_strings;
 extern Lisp_Object Qevent_symbol_element_mask;
 extern Lisp_Object Qfile_exists_p;
 /* non-zero if inside `load' */
 int load_in_progress;
 static int load_force_doc_strings;
 /* Nonzero means read should convert strings to unibyte.  */
 static int load_convert_to_unibyte;
+/* Nonzero means READCHAR should read bytes one by one (not character)
+when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char.
+This is set to 1 by read1 temporarily while handling #@NUMBER.  */
+static int load_each_byte;
 /* Function to use for loading an Emacs lisp source file (not
 compiled) instead of readevalloop.  */
 Lisp_Object Vload_source_file_function;
 /* List of all DEFVAR_BOOL variables.  Used by the byte optimizer.  */
 /* For use within read-from-string (this reader is non-reentrant!!)  */
 static int read_from_string_index;
 static int read_from_string_index_byte;
 static int read_from_string_limit;
-/* Number of bytes left to read in the buffer character
-that `readchar' has already advanced over.  */
-static int readchar_backlog;
 /* Number of characters read in the current call to Fread or
 Fread_from_string. */
 static int readchar_count;
 /* This contains the last string skipped with #@.  */
 /* A regular expression used to detect files compiled with Emacs.  */
 static Lisp_Object Vbytecomp_version_regexp;
-static void to_multibyte P_ ((char **, char **, int *));
+static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object),
+				     Lisp_Object));
 static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object,
 			      Lisp_Object (*) (), int,
 			      Lisp_Object, Lisp_Object));
 static Lisp_Object load_unwind P_ ((Lisp_Object));
 static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object));
+/* Functions that read one byte from the current source READCHARFUN
+or unreads one byte.  If the integer argument C is -1, it returns
+one read byte, or -1 when there's no more byte in the source.  If C
+is 0 or positive, it unreads C, and the return value is not
+interesting.  */
+static int readbyte_for_lambda P_ ((int, Lisp_Object));
+static int readbyte_from_file P_ ((int, Lisp_Object));
+static int readbyte_from_string P_ ((int, Lisp_Object));
 /* Handle unreading and rereading of characters.
 Write READCHAR to read a character,
 UNREAD(c) to unread c to be read again.
-The READCHAR and UNREAD macros are meant for reading/unreading a
+These macros correctly read/unread multibyte characters.  */
-byte code; they do not handle multibyte characters.  The caller
-should manage them if necessary.
-[ Actually that seems to be a lie; READCHAR will definitely read
-multibyte characters from buffer sources, at least.  Is the
-comment just out of date?
--- Colin Walters <walters@gnu.org>, 22 May 2002 16:36:50 -0400 ]
-*/
 #define READCHAR readchar (readcharfun)
 #define UNREAD(c) unreadchar (readcharfun, c)
+/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
+Qlambda, or a cons, we use this to keep an unread character because
+a file stream can't handle multibyte-char unreading.  The value -1
+means that there's no unread character. */
+static int unread_char;
 static int
 readchar (readcharfun)
 Lisp_Object readcharfun;
 {
 Lisp_Object tem;
 register int c;
+int (*readbyte) P_ ((int, Lisp_Object));
+unsigned char buf[MAX_MULTIBYTE_LENGTH];
+int i, len;
+int emacs_mule_encoding = 0;
 readchar_count++;
 if (BUFFERP (readcharfun))
 {
 register struct buffer *inbuffer = XBUFFER (readcharfun);
 int pt_byte = BUF_PT_BYTE (inbuffer);
-int orig_pt_byte = pt_byte;
-if (readchar_backlog > 0)
-	/* We get the address of the byte just passed,
-	   which is the last byte of the character.
-	   The other bytes in this character are consecutive with it,
-	   because the gap can't be in the middle of a character.  */
-	return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
-		 - --readchar_backlog);
 if (pt_byte >= BUF_ZV_BYTE (inbuffer))
 	return -1;
-readchar_backlog = -1;
 if (! NILP (inbuffer->enable_multibyte_characters))
 	{
 	  /* Fetch the character code from the buffer.  */
 	  unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte);
 	  c = STRING_CHAR (p, pt_byte - orig_pt_byte);
 	}
 else
 	{
 	  c = BUF_FETCH_BYTE (inbuffer, pt_byte);
+	  if (! ASCII_BYTE_P (c))
+	    c = BYTE8_TO_CHAR (c);
 	  pt_byte++;
 	}
 SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
 return c;
 if (MARKERP (readcharfun))
 {
 register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
 int bytepos = marker_byte_position (readcharfun);
-int orig_bytepos = bytepos;
-if (readchar_backlog > 0)
-	/* We get the address of the byte just passed,
-	   which is the last byte of the character.
-	   The other bytes in this character are consecutive with it,
-	   because the gap can't be in the middle of a character.  */
-	return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
-		 - --readchar_backlog);
 if (bytepos >= BUF_ZV_BYTE (inbuffer))
 	return -1;
-readchar_backlog = -1;
 if (! NILP (inbuffer->enable_multibyte_characters))
 	{
 	  /* Fetch the character code from the buffer.  */
 	  unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos);
 	  c = STRING_CHAR (p, bytepos - orig_bytepos);
 	}
 else
 	{
 	  c = BUF_FETCH_BYTE (inbuffer, bytepos);
+	  if (! ASCII_BYTE_P (c))
+	    c = BYTE8_TO_CHAR (c);
 	  bytepos++;
 	}
 XMARKER (readcharfun)->bytepos = bytepos;
 XMARKER (readcharfun)->charpos++;
 return c;
 }
 if (EQ (readcharfun, Qlambda))
-return read_bytecode_char (0);
+{
+readbyte = readbyte_for_lambda;
+goto read_multibyte;
+}
 if (EQ (readcharfun, Qget_file_char))
 {
-c = getc (instream);
+readbyte = readbyte_from_file;
-#ifdef EINTR
+goto read_multibyte;
-/* Interrupted reads have been observed while reading over the network */
-while (c == EOF && ferror (instream) && errno == EINTR)
-	{
-	  clearerr (instream);
-	  c = getc (instream);
-	}
-#endif
-return c;
 }
 if (STRINGP (readcharfun))
 {
 if (read_from_string_index >= read_from_string_limit)
 				   read_from_string_index_byte);
 return c;
 }
+if (CONSP (readcharfun))
+{
+/* This is the case that read_vector is reading from a unibyte
+	 string that contains a byte sequence previously skipped
+	 because of #@NUMBER.  The car part of readcharfun is that
+	 string, and the cdr part is a value of readcharfun given to
+	 read_vector.  */
+readbyte = readbyte_from_string;
+if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
+	emacs_mule_encoding = 1;
+goto read_multibyte;
+}
+if (EQ (readcharfun, Qget_emacs_mule_file_char))
+{
+readbyte = readbyte_from_file;
+emacs_mule_encoding = 1;
+goto read_multibyte;
+}
 tem = call0 (readcharfun);
 if (NILP (tem))
 return -1;
 return XINT (tem);
+read_multibyte:
+if (unread_char >= 0)
+{
+c = unread_char;
+unread_char = -1;
+return c;
+}
+c = (*readbyte) (-1, readcharfun);
+if (c < 0 || ASCII_BYTE_P (c) || load_each_byte)
+return c;
+if (emacs_mule_encoding)
+return read_emacs_mule_char (c, readbyte, readcharfun);
+i = 0;
+buf[i++] = c;
+len = BYTES_BY_CHAR_HEAD (c);
+while (i < len)
+{
+c = (*readbyte) (-1, readcharfun);
+if (c < 0 || ! TRAILING_CODE_P (c))
+	{
+	  while (--i > 1)
+	    (*readbyte) (buf[i], readcharfun);
+	  return BYTE8_TO_CHAR (buf[0]);
+	}
+buf[i++] = c;
+}
+return STRING_CHAR (buf, i);
 }
 /* Unread the character C in the way appropriate for the stream READCHARFUN.
 If the stream is a user function, call it with the char as argument.  */
 else if (BUFFERP (readcharfun))
 {
 struct buffer *b = XBUFFER (readcharfun);
 int bytepos = BUF_PT_BYTE (b);
-if (readchar_backlog >= 0)
+BUF_PT (b)--;
-	readchar_backlog++;
+if (! NILP (b->enable_multibyte_characters))
+	BUF_DEC_POS (b, bytepos);
 else
-	{
+	bytepos--;
-	  BUF_PT (b)--;
-	  if (! NILP (b->enable_multibyte_characters))
+BUF_PT_BYTE (b) = bytepos;
-	    BUF_DEC_POS (b, bytepos);
-	  else
-	    bytepos--;
-	  BUF_PT_BYTE (b) = bytepos;
-	}
 }
 else if (MARKERP (readcharfun))
 {
 struct buffer *b = XMARKER (readcharfun)->buffer;
 int bytepos = XMARKER (readcharfun)->bytepos;
-if (readchar_backlog >= 0)
+XMARKER (readcharfun)->charpos--;
-	readchar_backlog++;
+if (! NILP (b->enable_multibyte_characters))
+	BUF_DEC_POS (b, bytepos);
 else
-	{
+	bytepos--;
-	  XMARKER (readcharfun)->charpos--;
-	  if (! NILP (b->enable_multibyte_characters))
+XMARKER (readcharfun)->bytepos = bytepos;
-	    BUF_DEC_POS (b, bytepos);
-	  else
-	    bytepos--;
-	  XMARKER (readcharfun)->bytepos = bytepos;
-	}
 }
 else if (STRINGP (readcharfun))
 {
 read_from_string_index--;
 read_from_string_index_byte
 	= string_char_to_byte (readcharfun, read_from_string_index);
 }
+else if (CONSP (readcharfun))
+{
+unread_char = c;
+}
 else if (EQ (readcharfun, Qlambda))
-read_bytecode_char (1);
+{
-else if (EQ (readcharfun, Qget_file_char))
+unread_char = c;
-ungetc (c, instream);
+}
+else if (EQ (readcharfun, Qget_file_char)
+	   || EQ (readcharfun, Qget_emacs_mule_file_char))
+{
+if (load_each_byte)
+	ungetc (c, instream);
+else
+	unread_char = c;
+}
 else
 call1 (readcharfun, make_number (c));
 }
+static int
+readbyte_for_lambda (c, readcharfun)
+int c;
+Lisp_Object readcharfun;
+{
+return read_bytecode_char (c >= 0);
+}
+static int
+readbyte_from_file (c, readcharfun)
+int c;
+Lisp_Object readcharfun;
+{
+if (c >= 0)
+{
+ungetc (c, instream);
+return 0;
+}
+c = getc (instream);
+#ifdef EINTR
+/* Interrupted reads have been observed while reading over the network */
+while (c == EOF && ferror (instream) && errno == EINTR)
+{
+clearerr (instream);
+c = getc (instream);
+}
+#endif
+return (c == EOF ? -1 : c);
+}
+static int
+readbyte_from_string (c, readcharfun)
+int c;
+Lisp_Object readcharfun;
+{
+Lisp_Object string = XCAR (readcharfun);
+if (c >= 0)
+{
+read_from_string_index--;
+read_from_string_index_byte
+	= string_char_to_byte (string, read_from_string_index);
+}
+if (read_from_string_index >= read_from_string_limit)
+c = -1;
+else
+FETCH_STRING_CHAR_ADVANCE (c, string,
+			       read_from_string_index,
+			       read_from_string_index_byte);
+return c;
+}
+/* Read one non-ASCII character from INSTREAM.  The character is
+encoded in `emacs-mule' and the first byte is already read in
+C.  */
+extern char emacs_mule_bytes[256];
+static int
+read_emacs_mule_char (c, readbyte, readcharfun)
+int c;
+int (*readbyte) P_ ((int, Lisp_Object));
+Lisp_Object readcharfun;
+{
+/* Emacs-mule coding uses at most 4-byte for one character.  */
+unsigned char buf[4];
+int len = emacs_mule_bytes[c];
+struct charset *charset;
+int i;
+unsigned code;
+if (len == 1)
+/* C is not a valid leading-code of `emacs-mule'.  */
+return BYTE8_TO_CHAR (c);
+i = 0;
+buf[i++] = c;
+while (i < len)
+{
+c = (*readbyte) (-1, readcharfun);
+if (c < 0xA0)
+	{
+	  while (--i > 1)
+	    (*readbyte) (buf[i], readcharfun);
+	  return BYTE8_TO_CHAR (buf[0]);
+	}
+buf[i++] = c;
+}
+if (len == 2)
+{
+charset = emacs_mule_charset[buf[0]];
+code = buf[1] & 0x7F;
+}
+else if (len == 3)
+{
+if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11
+	  || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+	{
+	  charset = emacs_mule_charset[buf[1]];
+	  code = buf[2] & 0x7F;
+	}
+else
+	{
+	  charset = emacs_mule_charset[buf[0]];
+	  code = ((buf[1] << 8) | buf[2]) & 0x7F7F;
+	}
+}
+else
+{
+charset = emacs_mule_charset[buf[1]];
+code = ((buf[2] << 8) | buf[3]) & 0x7F7F;
+}
+c = DECODE_CHAR (charset, code);
+if (c < 0)
+Fsignal (Qinvalid_read_syntax,
+	     Fcons (build_string ("invalid multibyte form"), Qnil));
+return c;
+}
 static Lisp_Object read_internal_start P_ ((Lisp_Object, Lisp_Object,
 					    Lisp_Object));
 static Lisp_Object read0 P_ ((Lisp_Object));
 static Lisp_Object read1 P_ ((Lisp_Object, int *, int));
 static Lisp_Object read_list P_ ((int, Lisp_Object));
 static Lisp_Object read_vector P_ ((Lisp_Object, int));
-static int read_multibyte P_ ((int, Lisp_Object));
 static Lisp_Object substitute_object_recurse P_ ((Lisp_Object, Lisp_Object,
 						  Lisp_Object));
 static void substitute_object_in_subtree P_ ((Lisp_Object,
 					      Lisp_Object));
 return val;
 }
-/* Value is non-zero if the file asswociated with file descriptor FD
+/* Value is a version number of byte compiled code if the file
-is a compiled Lisp file that's safe to load.  Only files compiled
+asswociated with file descriptor FD is a compiled Lisp file that's
-with Emacs are safe to load.  Files compiled with XEmacs can lead
+safe to load.  Only files compiled with Emacs are safe to load.
-to a crash in Fbyte_code because of an incompatible change in the
+Files compiled with XEmacs can lead to a crash in Fbyte_code
-byte compiler.  */
+because of an incompatible change in the byte compiler.  */
 static int
 safe_to_load_p (fd)
 int fd;
 {
 char buf[512];
 int nbytes, i;
 int safe_p = 1;
+int version = 1;
 /* Read the first few bytes from the file, and look for a line
 specifying the byte compiler version used.  */
 nbytes = emacs_read (fd, buf, sizeof buf - 1);
 if (nbytes > 0)
 {
 buf[nbytes] = '\0';
 /* Skip to the next newline, skipping over the initial `ELC'
-	 with NUL bytes following it.  */
+	 with NUL bytes following it, but note the version.  */
 for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
-	;
+	if (i == 4)
+	  version = buf[i];
-if (i < nbytes
-	  && fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
+if (i == nbytes
+	  || fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
 					      buf + i) < 0)
 	safe_p = 0;
 }
+if (safe_p)
+safe_p = version;
 lseek (fd, 0, SEEK_SET);
 return safe_p;
 }
 /* 1 means we are loading a compiled file.  */
 int compiled = 0;
 Lisp_Object handler;
 int safe_p = 1;
 char *fmode = "r";
+int version;
 #ifdef DOS_NT
 fmode = "rt";
 #endif /* DOS_NT */
 CHECK_STRING (file);
 			      Fcons (found, Vloads_in_progress)));
 record_unwind_protect (record_load_unwind, Vloads_in_progress);
 Vloads_in_progress = Fcons (found, Vloads_in_progress);
 }
+version = -1;
 if (!bcmp (SDATA (found) + SBYTES (found) - 4,
-	     ".elc", 4))
+	     ".elc", 4)
+|| (version = safe_to_load_p (fd)) > 0)
 /* Load .elc files directly, but not when they are
 remote and have no handler!  */
 {
 if (fd != -2)
 	{
 	  struct stat s1, s2;
 	  int result;
-	  if (!safe_to_load_p (fd))
+	  if (version < 0
+	      && ! (version = safe_to_load_p (fd)))
 	    {
 	      safe_p = 0;
 	      if (!load_dangerous_libraries)
 		{
 		  if (fd >= 0)
 specbind (Qload_file_name, found);
 specbind (Qinhibit_file_name_operation, Qnil);
 load_descriptor_list
 = Fcons (make_number (fileno (stream)), load_descriptor_list);
 load_in_progress++;
-readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+if (! version || version >= 22)
+readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+else
+{
+/* We can't handle a file which was compiled with
+	 byte-compile-dynamic by older version of Emacs.  */
+specbind (Qload_force_doc_strings, Qt);
+readevalloop (Qget_emacs_mule_file_char, stream, file, Feval, 0,
+		    Qnil, Qnil);
+}
 unbind_to (count, Qnil);
 /* Run any load-hooks for this file.  */
 temp = Fassoc (file, Vafter_load_alist);
 if (!NILP (temp))
 specbind (Qstandard_input, readcharfun);
 specbind (Qcurrent_load_list, Qnil);
 record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil);
 load_convert_to_unibyte = !NILP (unibyte);
-readchar_backlog = -1;
 GCPRO1 (sourcename);
 LOADHIST_ATTACH (sourcename);
 continue_reading_p = 1;
 Lisp_Object start; /* Only used when stream is a string. */
 Lisp_Object end; /* Only used when stream is a string. */
 {
 Lisp_Object retval;
-readchar_backlog = -1;
 readchar_count = 0;
 new_backquote_flag = 0;
 read_objects = Qnil;
 if (EQ (Vread_with_symbol_positions, Qt)
 || EQ (Vread_with_symbol_positions, stream))
 Vread_symbol_positions_list = Qnil;
-if (STRINGP (stream))
+if (STRINGP (stream)
+|| ((CONSP (stream) && STRINGP (XCAR (stream)))))
 {
 int startval, endval;
+Lisp_Object string;
+if (STRINGP (stream))
+	string = stream;
+else
+	string = XCAR (stream);
 if (NILP (end))
-	endval = SCHARS (stream);
+	endval = SCHARS (string);
 else
 	{
 	  CHECK_NUMBER (end);
 	  endval = XINT (end);
-	  if (endval < 0 || endval > SCHARS (stream))
+	  if (endval < 0 || endval > SCHARS (string))
-	    args_out_of_range (stream, end);
+	    args_out_of_range (string, end);
 	}
 if (NILP (start))
 	startval = 0;
 else
 	{
 	  CHECK_NUMBER (start);
 	  startval = XINT (start);
 	  if (startval < 0 || startval > endval)
-	    args_out_of_range (stream, start);
+	    args_out_of_range (string, start);
 	}
 read_from_string_index = startval;
-read_from_string_index_byte = string_char_to_byte (stream, startval);
+read_from_string_index_byte = string_char_to_byte (string, startval);
 read_from_string_limit = endval;
 }
 retval = read0 (stream);
 if (EQ (Vread_with_symbol_positions, Qt)
 }
 static int read_buffer_size;
 static char *read_buffer;
-/* Read multibyte form and return it as a character.  C is a first
+/* Read a \-escape sequence, assuming we already read the `\'.
-byte of multibyte form, and rest of them are read from
+If the escape sequence forces unibyte, return eight-bit char.  */
-READCHARFUN.  */
 static int
-read_multibyte (c, readcharfun)
+read_escape (readcharfun, stringp)
-register int c;
-Lisp_Object readcharfun;
-{
-/* We need the actual character code of this multibyte
-characters.  */
-unsigned char str[MAX_MULTIBYTE_LENGTH];
-int len = 0;
-int bytes;
-if (c < 0)
-return c;
-str[len++] = c;
-while ((c = READCHAR) >= 0xA0
-	 && len < MAX_MULTIBYTE_LENGTH)
-{
-str[len++] = c;
-readchar_count--;
-}
-UNREAD (c);
-if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))
-return STRING_CHAR (str, len);
-/* The byte sequence is not valid as multibyte.  Unread all bytes
-but the first one, and return the first byte.  */
-while (--len > 0)
-UNREAD (str[len]);
-return str[0];
-}
-/* Read a \-escape sequence, assuming we already read the `\'.
-If the escape sequence forces unibyte, store 1 into *BYTEREP.
-If the escape sequence forces multibyte, store 2 into *BYTEREP.
-Otherwise store 0 into *BYTEREP.  */
-static int
-read_escape (readcharfun, stringp, byterep)
 Lisp_Object readcharfun;
 int stringp;
-int *byterep;
 {
 register int c = READCHAR;
-*byterep = 0;
 switch (c)
 {
 case -1:
 end_of_file_error ();
 c = READCHAR;
 if (c != '-')
 	error ("Invalid escape character syntax");
 c = READCHAR;
 if (c == '\\')
-	c = read_escape (readcharfun, 0, byterep);
+	c = read_escape (readcharfun, 0);
 return c | meta_modifier;
 case 'S':
 c = READCHAR;
 if (c != '-')
 	error ("Invalid escape character syntax");
 c = READCHAR;
 if (c == '\\')
-	c = read_escape (readcharfun, 0, byterep);
+	c = read_escape (readcharfun, 0);
 return c | shift_modifier;
 case 'H':
 c = READCHAR;
 if (c != '-')
 	error ("Invalid escape character syntax");
 c = READCHAR;
 if (c == '\\')
-	c = read_escape (readcharfun, 0, byterep);
+	c = read_escape (readcharfun, 0);
 return c | hyper_modifier;
 case 'A':
 c = READCHAR;
 if (c != '-')
 	error ("Invalid escape character syntax");
 c = READCHAR;
 if (c == '\\')
-	c = read_escape (readcharfun, 0, byterep);
+	c = read_escape (readcharfun, 0);
 return c | alt_modifier;
 case 's':
 if (stringp)
 	return ' ';
 	UNREAD (c);
 	return ' ';
 }
 c = READCHAR;
 if (c == '\\')
-	c = read_escape (readcharfun, 0, byterep);
+	c = read_escape (readcharfun, 0);
 return c | super_modifier;
 case 'C':
 c = READCHAR;
 if (c != '-')
 	error ("Invalid escape character syntax");
 case '^':
 c = READCHAR;
 if (c == '\\')
-	c = read_escape (readcharfun, 0, byterep);
+	c = read_escape (readcharfun, 0);
 if ((c & ~CHAR_MODIFIER_MASK) == '?')
 	return 0177 | (c & CHAR_MODIFIER_MASK);
 else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 	return c | ctrl_modifier;
 /* ASCII control chars are made from letters (both cases),
 		UNREAD (c);
 		break;
 	      }
 	  }
-	*byterep = 1;
+	if (i >= 0x80 && i < 0x100)
+	  i = BYTE8_TO_CHAR (i);
 	return i;
 }
 case 'x':
 /* A hex escape, as in ANSI C.  */
 {
 	int i = 0;
+	int count = 0;
 	while (1)
 	  {
 	    c = READCHAR;
 	    if (c >= '0' && c <= '9')
 	      {
 	    else
 	      {
 		UNREAD (c);
 		break;
 	      }
+	    count++;
 	  }
-	*byterep = 2;
+	if (count < 3 && i >= 0x80)
+	  return BYTE8_TO_CHAR (i);
 	return i;
 }
 default:
-if (BASE_LEADING_CODE_P (c))
-	c = read_multibyte (c, readcharfun);
 return c;
 }
 }
 return make_number (sign * number);
 }
-/* Convert unibyte text in read_buffer to multibyte.
-Initially, *P is a pointer after the end of the unibyte text, and
-the pointer *END points after the end of read_buffer.
-If read_buffer doesn't have enough room to hold the result
-of the conversion, reallocate it and adjust *P and *END.
-At the end, make *P point after the result of the conversion, and
-return in *NCHARS the number of characters in the converted
-text.  */
-static void
-to_multibyte (p, end, nchars)
-char **p, **end;
-int *nchars;
-{
-int nbytes;
-parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
-if (read_buffer_size < 2 * nbytes)
-{
-int offset = *p - read_buffer;
-read_buffer_size = 2 * max (read_buffer_size, nbytes);
-read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
-*p = read_buffer + offset;
-*end = read_buffer + read_buffer_size;
-}
-if (nbytes != *nchars)
-nbytes = str_as_multibyte (read_buffer, read_buffer_size,
-			       *p - read_buffer, nchars);
-*p = read_buffer + nbytes;
-}
 /* If the next token is ')' or ']' or '.', we store that character
 in *PCH and the return value is not interesting.  Else, we store
 zero in *PCH and we read and return one lisp object.
 FIRST_IN_LIST is nonzero if this is the first element of a list.  */
 {
 register int c;
 int uninterned_symbol = 0;
 *pch = 0;
+load_each_byte = 0;
 retry:
 c = READCHAR;
 if (c < 0)
 	  c = READCHAR;
 	  if (c == '[')
 	    {
 	      Lisp_Object tmp;
 	      tmp = read_vector (readcharfun, 0);
-	      if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS
+	      if (XVECTOR (tmp)->size != VECSIZE (struct Lisp_Char_Table))
-		  || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
 		error ("Invalid size char-table");
 	      XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
-	      XCHAR_TABLE (tmp)->top = Qt;
 	      return tmp;
 	    }
 	  else if (c == '^')
 	    {
 	      c = READCHAR;
 	      if (c == '[')
 		{
 		  Lisp_Object tmp;
+		  int depth, size;
 		  tmp = read_vector (readcharfun, 0);
-		  if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS)
+		  if (!INTEGERP (AREF (tmp, 0)))
+		    error ("Invalid depth in char-table");
+		  depth = XINT (AREF (tmp, 0));
+		  if (depth < 1 || depth > 3)
+		    error ("Invalid depth in char-table");
+		  size = XVECTOR (tmp)->size + 2;
+		  if (chartab_size [depth] != size)
 		    error ("Invalid size char-table");
-		  XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
+		  XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
-		  XCHAR_TABLE (tmp)->top = Qnil;
 		  return tmp;
 		}
 	      Fsignal (Qinvalid_read_syntax,
 		       Fcons (make_string ("#^^", 3), Qnil));
 	    }
 	      int size_in_chars = ((XFASTINT (length) + BITS_PER_CHAR - 1)
 				   / BITS_PER_CHAR);
 	      UNREAD (c);
 	      tmp = read1 (readcharfun, pch, first_in_list);
-	      if (size_in_chars != SCHARS (tmp)
+	      if (STRING_MULTIBYTE (tmp)
-		  /* We used to print 1 char too many
+		  || (size_in_chars != SCHARS (tmp)
-		     when the number of bits was a multiple of 8.
+		      /* We used to print 1 char too many
-		     Accept such input in case it came from an old version.  */
+			 when the number of bits was a multiple of 8.
-		  && ! (XFASTINT (length)
+			 Accept such input in case it came from an old
-			== (SCHARS (tmp) - 1) * BITS_PER_CHAR))
+			 version.  */
+		      && ! (XFASTINT (length)
+			    == (SCHARS (tmp) - 1) * BITS_PER_CHAR)))
 		Fsignal (Qinvalid_read_syntax,
 			 Fcons (make_string ("#&...", 5), Qnil));
 	      val = Fmake_bool_vector (length, Qnil);
 	      bcopy (SDATA (tmp), XBOOL_VECTOR (val)->data,
 	 and function definitions.  */
 if (c == '@')
 	{
 	  int i, nskip = 0;
+	  load_each_byte = 1;
 	  /* Read a decimal integer.  */
 	  while ((c = READCHAR) >= 0
 		 && c >= '0' && c <= '9')
 	    {
 	      nskip *= 10;
 	      nskip += c - '0';
 	    }
 	  if (c >= 0)
 	    UNREAD (c);
-	  if (load_force_doc_strings && EQ (readcharfun, Qget_file_char))
+	  if (load_force_doc_strings
+	      && (EQ (readcharfun, Qget_file_char)
+		  || EQ (readcharfun, Qget_emacs_mule_file_char)))
 	    {
 	      /* If we are supposed to force doc strings into core right now,
 		 record the last string that we skipped,
 		 and record where in the file it comes from.  */
 	      /* Skip that many characters.  */
 	      for (i = 0; i < nskip && c >= 0; i++)
 		c = READCHAR;
 	    }
+	  load_each_byte = 0;
 	  goto retry;
 	}
 if (c == '!')
 	{
 	  /* #! appears at the beginning of an executable file.
 else
 	goto default_label;
 case '?':
 {
-	int discard;
+	int modifiers;
 	int next_char;
 	int ok;
 	c = READCHAR;
 	if (c < 0)
 	   as there are well-established escape sequences for these.  */
 	if (c == ' ' || c == '\t')
 	  return make_number (c);
 	if (c == '\\')
-	  c = read_escape (readcharfun, 0, &discard);
+	  c = read_escape (readcharfun, 0);
-	else if (BASE_LEADING_CODE_P (c))
+	modifiers = c & CHAR_MODIFIER_MASK;
-	  c = read_multibyte (c, readcharfun);
+	c &= ~CHAR_MODIFIER_MASK;
+	if (CHAR_BYTE8_P (c))
+	  c = CHAR_TO_BYTE8 (c);
+	c |= modifiers;
 	next_char = READCHAR;
 	if (next_char == '.')
 	  {
 	    /* Only a dotted-pair dot is valid after a char constant.  */
 case '"':
 {
 	char *p = read_buffer;
 	char *end = read_buffer + read_buffer_size;
 	register int c;
-	/* 1 if we saw an escape sequence specifying
+	/* Nonzero if we saw an escape sequence specifying
-	   a multibyte character, or a multibyte character.  */
+	   a multibyte character.  */
 	int force_multibyte = 0;
-	/* 1 if we saw an escape sequence specifying
+	/* Nonzero if we saw an escape sequence specifying
 	   a single-byte character.  */
 	int force_singlebyte = 0;
-	/* 1 if read_buffer contains multibyte text now.  */
-	int is_multibyte = 0;
 	int cancel = 0;
 	int nchars = 0;
 	while ((c = READCHAR) >= 0
 	       && c != '\"')
 		end = read_buffer + read_buffer_size;
 	      }
 	    if (c == '\\')
 	      {
-		int byterep;
+		int modifiers;
-		c = read_escape (readcharfun, 1, &byterep);
+		c = read_escape (readcharfun, 1);
 		/* C is -1 if \ newline has just been seen */
 		if (c == -1)
 		  {
 		    if (p == read_buffer)
 		      cancel = 1;
 		    continue;
 		  }
-		if (byterep == 1)
+		modifiers = c & CHAR_MODIFIER_MASK;
+		c = c & ~CHAR_MODIFIER_MASK;
+		if (CHAR_BYTE8_P (c))
 		  force_singlebyte = 1;
-		else if (byterep == 2)
+		else if (! ASCII_CHAR_P (c))
+		  force_multibyte = 1;
+		else		/* i.e. ASCII_CHAR_P (c) */
+		  {
+		    /* Allow `\C- ' and `\C-?'.  */
+		    if (modifiers == CHAR_CTL)
+		      {
+			if (c == ' ')
+			  c = 0, modifiers = 0;
+			else if (c == '?')
+			  c = 127, modifiers = 0;
+		      }
+		    if (modifiers & CHAR_SHIFT)
+		      {
+			/* Shift modifier is valid only with [A-Za-z].  */
+			if (c >= 'A' && c <= 'Z')
+			  modifiers &= ~CHAR_SHIFT;
+			else if (c >= 'a' && c <= 'z')
+			  c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
+		      }
+		    if (modifiers & CHAR_META)
+		      {
+			/* Move the meta bit to the right place for a
+			   string.  */
+			modifiers &= ~CHAR_META;
+			c = BYTE8_TO_CHAR (c | 0x80);
+			force_singlebyte = 1;
+		      }
+		  }
+		/* Any modifiers remaining are invalid.  */
+		if (modifiers)
+		  error ("Invalid modifier in string");
+		p += CHAR_STRING (c, (unsigned char *) p);
+	      }
+	    else
+	      {
+		p += CHAR_STRING (c, (unsigned char *) p);
+		if (CHAR_BYTE8_P (c))
+		  force_singlebyte = 1;
+		else if (! ASCII_CHAR_P (c))
 		  force_multibyte = 1;
 	      }
-	    /* A character that must be multibyte forces multibyte.  */
-	    if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
-	      force_multibyte = 1;
-	    /* If we just discovered the need to be multibyte,
-	       convert the text accumulated thus far.  */
-	    if (force_multibyte && ! is_multibyte)
-	      {
-		is_multibyte = 1;
-		to_multibyte (&p, &end, &nchars);
-	      }
-	    /* Allow `\C- ' and `\C-?'.  */
-	    if (c == (CHAR_CTL | ' '))
-	      c = 0;
-	    else if (c == (CHAR_CTL | '?'))
-	      c = 127;
-	    if (c & CHAR_SHIFT)
-	      {
-		/* Shift modifier is valid only with [A-Za-z].  */
-		if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
-		  c &= ~CHAR_SHIFT;
-		else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
-		  c = (c & ~CHAR_SHIFT) - ('a' - 'A');
-	      }
-	    if (c & CHAR_META)
-	      /* Move the meta bit to the right place for a string.  */
-	      c = (c & ~CHAR_META) | 0x80;
-	    if (c & CHAR_MODIFIER_MASK)
-	      error ("Invalid modifier in string");
-	    if (is_multibyte)
-	      p += CHAR_STRING (c, p);
-	    else
-	      *p++ = c;
 	    nchars++;
 	  }
 	if (c < 0)
 	  end_of_file_error ();
 	   return zero instead.  This is for doc strings
 	   that we are really going to find in etc/DOC.nn.nn  */
 	if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
 	  return make_number (0);
-	if (is_multibyte || force_singlebyte)
+	if (force_multibyte)
+	  /* READ_BUFFER already contains valid multibyte forms.  */
 	  ;
-	else if (load_convert_to_unibyte)
+	else if (force_singlebyte)
 	  {
-	    Lisp_Object string;
+	    nchars = str_as_unibyte (read_buffer, p - read_buffer);
-	    to_multibyte (&p, &end, &nchars);
+	    p = read_buffer + nchars;
-	    if (p - read_buffer != nchars)
-	      {
-		string = make_multibyte_string (read_buffer, nchars,
-						p - read_buffer);
-		return Fstring_make_unibyte (string);
-	      }
-	    /* We can make a unibyte string directly.  */
-	    is_multibyte = 0;
-	  }
-	else if (EQ (readcharfun, Qget_file_char)
-		 || EQ (readcharfun, Qlambda))
-	  {
-	    /* Nowadays, reading directly from a file is used only for
-	       compiled Emacs Lisp files, and those always use the
-	       Emacs internal encoding.  Meanwhile, Qlambda is used
-	       for reading dynamic byte code (compiled with
-	       byte-compile-dynamic = t).  So make the string multibyte
-	       if the string contains any multibyte sequences.
-	       (to_multibyte is a no-op if not.)  */
-	    to_multibyte (&p, &end, &nchars);
-	    is_multibyte = (p - read_buffer) != nchars;
 	  }
 	else
-	  /* In all other cases, if we read these bytes as
+	  /* Otherwise, READ_BUFFER contains only ASCII.  */
-	     separate characters, treat them as separate characters now.  */
 	  ;
 	/* We want readchar_count to be the number of characters, not
 	   bytes.  Hence we adjust for multibyte characters in the
 	   string.  ... But it doesn't seem to be necessary, because
 	   READCHAR *does* read multibyte characters from buffers. */
 	/* readchar_count -= (p - read_buffer) - nchars; */
 	if (read_pure)
 	  return make_pure_string (read_buffer, nchars, p - read_buffer,
-				   is_multibyte);
+				   (force_multibyte
+				    || (p - read_buffer != nchars)));
 	return make_specified_string (read_buffer, nchars, p - read_buffer,
-				      is_multibyte);
+				      (force_multibyte
+				       || (p - read_buffer != nchars)));
 }
 case '.':
 {
 	int next_char = READCHAR;
 		  if (c == -1)
 		    end_of_file_error ();
 		  quoted = 1;
 		}
-	      if (! SINGLE_BYTE_CHAR_P (c))
+	      p += CHAR_STRING (c, p);
-		p += CHAR_STRING (c, p);
-	      else
-		*p++ = c;
 	      c = READCHAR;
 	    }
 	  if (p == end)
 	    {
 		if (p1 == p)
 		  /* It is an integer. */
 		  {
 		    if (p1[-1] == '.')
 		      p1[-1] = '\0';
+		    /* Fixme: if we have strtol, use that, and check
+		       for overflow.  */
 		    if (sizeof (int) == sizeof (EMACS_INT))
 		      XSETINT (val, atoi (read_buffer));
 		    else if (sizeof (long) == sizeof (EMACS_INT))
 		      XSETINT (val, atol (read_buffer));
 		    else
 		     but without generating extra garbage and
 		     guaranteeing no change in the contents).  */
 		  STRING_SET_CHARS (bytestr, SBYTES (bytestr));
 		  STRING_SET_UNIBYTE (bytestr);
-		  item = Fread (bytestr);
+		  item = Fread (Fcons (bytestr, readcharfun));
 		  if (!CONSP (item))
 		    error ("invalid byte code");
 		  otem = XCONS (item);
 		  bytestr = XCAR (item);
 		  free_cons (otem);
 		}
 	      /* Now handle the bytecode slot.  */
 	      ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr;
+	    }
+	  else if (i == COMPILED_DOC_STRING
+		   && STRINGP (item)
+		   && ! STRING_MULTIBYTE (item))
+	    {
+	      if (EQ (readcharfun, Qget_emacs_mule_file_char))
+		item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil);
+	      else
+		item = Fstring_as_multibyte (item);
 	    }
 	}
 ptr[i] = read_pure ? Fpurecopy (item) : item;
 otem = XCONS (tem);
 tem = Fcdr (tem);
 		  if (doc_reference == 1)
 		    return make_number (0);
 		  if (doc_reference == 2)
 		    {
 		      /* Get a doc string from the file we are loading.
-			 If it's in saved_doc_string, get it from there.  */
+			 If it's in saved_doc_string, get it from there.
+			 Here, we don't know if the string is a
+			 bytecode string or a doc string.  As a
+			 bytecode string must be unibyte, we always
+			 return a unibyte string.  If it is actually a
+			 doc string, caller must make it
+			 multibyte.  */
 		      int pos = XINT (XCDR (val));
 		      /* Position is negative for user variables.  */
 		      if (pos < 0) pos = -pos;
 		      if (pos >= saved_doc_string_position
 			  && pos < (saved_doc_string_position
 				}
 			      else
 				saved_doc_string[to++] = c;
 			    }
-			  return make_string (saved_doc_string + start,
+			  return make_unibyte_string (saved_doc_string + start,
-					      to - start);
+						      to - start);
 			}
 		      /* Look in prev_saved_doc_string the same way.  */
 		      else if (pos >= prev_saved_doc_string_position
 			       && pos < (prev_saved_doc_string_position
 					 + prev_saved_doc_string_length))
 				}
 			      else
 				prev_saved_doc_string[to++] = c;
 			    }
-			  return make_string (prev_saved_doc_string + start,
+			  return make_unibyte_string (prev_saved_doc_string
-					      to - start);
+						      + start,
+						      to - start);
 			}
 		      else
-			return get_doc_string (val, 0, 0);
+			return get_doc_string (val, 1, 0);
 		    }
 		  return val;
 		}
 	      return Fsignal (Qinvalid_read_syntax, Fcons (make_string (". in wrong context", 18), Qnil));
 staticpro (&Qread_char);
 Qget_file_char = intern ("get-file-char");
 staticpro (&Qget_file_char);
+Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char");
+staticpro (&Qget_emacs_mule_file_char);
+Qload_force_doc_strings = intern ("load-force-doc-strings");
+staticpro (&Qload_force_doc_strings);
 Qbackquote = intern ("`");
 staticpro (&Qbackquote);
 Qcomma = intern (",");
 staticpro (&Qcomma);
 Qcomma_at = intern (",@");

Mercurial > emacs

comparison src/lread.c @ 89483:2f877ed80fa6