Mercurial > emacs
changeset 88886:300f83fb46e1
Include "coding.h".
(Qget_emacs_mule_file_char, Qload_force_doc_strings,
load_each_byte, unread_char): New variables.
(readchar_backlog): This variable deleted.
(readchar): Return a character unless load_each_byte is nonzero.
Handle the case that readcharfun is Qget_emacs_mule_file_char or a
cons. If unread_char is not -1, simply return it.
(unreadchar): Handle the case that readcharfun is
Qget_emacs_mule_file_char or a cons. Set unread_char if
necessary.
(read_multibyte): This function deleted.
(readbyte_for_lambda, readbyte_from_file, readbyte_from_string)
(read_emacs_mule_char): New functions.
(Fload): Even if the file doesn't have the extention ".elc", if
safe_to_load_p returns a positive version number, assume that the
file contains bytecompiled code. If the version is less than 22,
load the file while decoding multibyte sequences by emacs-mule.
(readevalloop): Don't use readchar_backlog.
(Fread): Likewise. Pay attention to the case that STREAM is a
cons.
(Fread_from_string): Pay attention to the case that STREAM is a
cons.
(read_escape): The arg BYTEREP deleted.
(read1): Set load_each_byte to 1 temporarily while handling
#@NUMBER. Don't call read_multibyte.
(read_vector): Call Fread with a cons. If readcharfun is
Qget_emacs_mule_file_char, decode the read string by emacs-mule.
(read_list): If doc_reference is 2, make the cdr part string as
unibyte.
(syms_of_lread): Intern and staticpro Qget_emacs_mule_file_char
and Qload_force_doc_strings.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 24 Jul 2002 10:51:15 +0000 |
parents | c636c5e9d6ce |
children | 3c4bb9fc6a5f |
files | src/lread.c |
diffstat | 1 files changed, 355 insertions(+), 258 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lread.c Wed Jul 24 10:50:53 2002 +0000 +++ b/src/lread.c Wed Jul 24 10:51:15 2002 +0000 @@ -31,6 +31,7 @@ #include "buffer.h" #include "character.h" #include "charset.h" +#include "coding.h" #include <epaths.h> #include "commands.h" #include "keyboard.h" @@ -83,6 +84,12 @@ Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction; Lisp_Object Qinhibit_file_name_operation; +/* Used instead of Qget_file_char while loading *.elc files compiled + by Emacs 21 or older. */ +static Lisp_Object Qget_emacs_mule_file_char; + +static Lisp_Object Qload_force_doc_strings; + extern Lisp_Object Qevent_symbol_element_mask; extern Lisp_Object Qfile_exists_p; @@ -126,6 +133,11 @@ /* Nonzero means read should convert strings to unibyte. */ static int load_convert_to_unibyte; +/* Nonzero means READCHAR should read bytes one by one (not character) + when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char. + This is set to 1 by read1 temporarily while handling #@NUMBER. */ +static int load_each_byte; + /* Function to use for loading an Emacs lisp source file (not compiled) instead of readevalloop. */ Lisp_Object Vload_source_file_function; @@ -147,10 +159,6 @@ static int read_from_string_index_byte; static int read_from_string_limit; -/* Number of bytes left to read in the buffer character - that `readchar' has already advanced over. */ -static int readchar_backlog; - /* This contains the last string skipped with #@. */ static char *saved_doc_string; /* Length of buffer allocated in saved_doc_string. */ @@ -190,6 +198,9 @@ static Lisp_Object Vbytecomp_version_regexp; +static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object), + Lisp_Object)); + static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object, Lisp_Object (*) (), int, Lisp_Object, Lisp_Object)); @@ -197,23 +208,42 @@ static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object)); +/* Functions that read one byte from the current source READCHARFUN + or unreads one byte. If the integer argument C is -1, it returns + one read byte, or -1 when there's no more byte in the source. If C + is 0 or positive, it unreads C, and the return value is not + interesting. */ + +static int readbyte_for_lambda P_ ((int, Lisp_Object)); +static int readbyte_from_file P_ ((int, Lisp_Object)); +static int readbyte_from_string P_ ((int, Lisp_Object)); + /* Handle unreading and rereading of characters. Write READCHAR to read a character, UNREAD(c) to unread c to be read again. - These macros actually read/unread a byte code, multibyte characters - are not handled here. The caller should manage them if necessary. - */ + These macros correctly read/unread multibyte characters. */ #define READCHAR readchar (readcharfun) #define UNREAD(c) unreadchar (readcharfun, c) +/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char, + Qlambda, or a cons, we use this to keep unread character because a + file stream can't handle multibyte-char unreading. The value -1 + means that there's no unread character. */ +static int unread_char; + + static int readchar (readcharfun) Lisp_Object readcharfun; { Lisp_Object tem; register int c; + int (*readbyte) P_ ((int, Lisp_Object)); + unsigned char buf[MAX_MULTIBYTE_LENGTH]; + int i, len; + int emacs_mule_encoding = 0; if (BUFFERP (readcharfun)) { @@ -221,19 +251,9 @@ int pt_byte = BUF_PT_BYTE (inbuffer); - if (readchar_backlog > 0) - /* We get the address of the byte just passed, - which is the last byte of the character. - The other bytes in this character are consecutive with it, - because the gap can't be in the middle of a character. */ - return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1) - - --readchar_backlog); - if (pt_byte >= BUF_ZV_BYTE (inbuffer)) return -1; - readchar_backlog = -1; - if (! NILP (inbuffer->enable_multibyte_characters)) { /* Fetch the character code from the buffer. */ @@ -244,6 +264,8 @@ else { c = BUF_FETCH_BYTE (inbuffer, pt_byte); + if (! ASCII_BYTE_P (c)) + c = BYTE8_TO_CHAR (c); pt_byte++; } SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); @@ -256,19 +278,9 @@ int bytepos = marker_byte_position (readcharfun); - if (readchar_backlog > 0) - /* We get the address of the byte just passed, - which is the last byte of the character. - The other bytes in this character are consecutive with it, - because the gap can't be in the middle of a character. */ - return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1) - - --readchar_backlog); - if (bytepos >= BUF_ZV_BYTE (inbuffer)) return -1; - readchar_backlog = -1; - if (! NILP (inbuffer->enable_multibyte_characters)) { /* Fetch the character code from the buffer. */ @@ -279,6 +291,8 @@ else { c = BUF_FETCH_BYTE (inbuffer, bytepos); + if (! ASCII_BYTE_P (c)) + c = BYTE8_TO_CHAR (c); bytepos++; } @@ -289,22 +303,15 @@ } if (EQ (readcharfun, Qlambda)) - return read_bytecode_char (0); - + { + readbyte = readbyte_for_lambda; + goto read_multibyte; + } if (EQ (readcharfun, Qget_file_char)) { - c = getc (instream); -#ifdef EINTR - /* Interrupted reads have been observed while reading over the network */ - while (c == EOF && ferror (instream) && errno == EINTR) - { - clearerr (instream); - c = getc (instream); - } -#endif - return c; + readbyte = readbyte_from_file; + goto read_multibyte; } - if (STRINGP (readcharfun)) { if (read_from_string_index >= read_from_string_limit) @@ -316,14 +323,61 @@ return c; } + if (CONSP (readcharfun)) + { + /* This is the case that read_vector is reading from a unibyte + string that contains a byte sequence previously skipped + because of #@NUMBER. The car part of readcharfun is that + string, and the cdr part is a value of readcharfun given to + read_vector. */ + readbyte = readbyte_from_string; + if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char)) + emacs_mule_encoding = 1; + goto read_multibyte; + } + if (EQ (readcharfun, Qget_emacs_mule_file_char)) + { + readbyte = readbyte_from_file; + emacs_mule_encoding = 1; + goto read_multibyte; + } tem = call0 (readcharfun); if (NILP (tem)) return -1; return XINT (tem); + + read_multibyte: + if (unread_char >= 0) + { + c = unread_char; + unread_char = -1; + return c; + } + c = (*readbyte) (-1, readcharfun); + if (c < 0 || ASCII_BYTE_P (c) || load_each_byte) + return c; + if (emacs_mule_encoding) + return read_emacs_mule_char (c, readbyte, readcharfun); + i = 0; + buf[i++] = c; + len = BYTES_BY_CHAR_HEAD (c); + while (i < len) + { + c = (*readbyte) (-1, readcharfun); + if (c < 0 || ! TRAILING_CODE_P (c)) + { + while (--i > 1) + (*readbyte) (buf[i], readcharfun); + return BYTE8_TO_CHAR (buf[0]); + } + buf[i++] = c; + } + return STRING_CHAR (buf, i); } + /* Unread the character C in the way appropriate for the stream READCHARFUN. If the stream is a user function, call it with the char as argument. */ @@ -341,36 +395,26 @@ struct buffer *b = XBUFFER (readcharfun); int bytepos = BUF_PT_BYTE (b); - if (readchar_backlog >= 0) - readchar_backlog++; + BUF_PT (b)--; + if (! NILP (b->enable_multibyte_characters)) + BUF_DEC_POS (b, bytepos); else - { - BUF_PT (b)--; - if (! NILP (b->enable_multibyte_characters)) - BUF_DEC_POS (b, bytepos); - else - bytepos--; - - BUF_PT_BYTE (b) = bytepos; - } + bytepos--; + + BUF_PT_BYTE (b) = bytepos; } else if (MARKERP (readcharfun)) { struct buffer *b = XMARKER (readcharfun)->buffer; int bytepos = XMARKER (readcharfun)->bytepos; - if (readchar_backlog >= 0) - readchar_backlog++; + XMARKER (readcharfun)->charpos--; + if (! NILP (b->enable_multibyte_characters)) + BUF_DEC_POS (b, bytepos); else - { - XMARKER (readcharfun)->charpos--; - if (! NILP (b->enable_multibyte_characters)) - BUF_DEC_POS (b, bytepos); - else - bytepos--; - - XMARKER (readcharfun)->bytepos = bytepos; - } + bytepos--; + + XMARKER (readcharfun)->bytepos = bytepos; } else if (STRINGP (readcharfun)) { @@ -378,16 +422,152 @@ read_from_string_index_byte = string_char_to_byte (readcharfun, read_from_string_index); } + else if (CONSP (readcharfun)) + { + unread_char = c; + } else if (EQ (readcharfun, Qlambda)) - read_bytecode_char (1); - else if (EQ (readcharfun, Qget_file_char)) - ungetc (c, instream); + { + unread_char = c; + } + else if (EQ (readcharfun, Qget_file_char) + || EQ (readcharfun, Qget_emacs_mule_file_char)) + { + if (load_each_byte) + ungetc (c, instream); + else + unread_char = c; + } else call1 (readcharfun, make_number (c)); } +static int +readbyte_for_lambda (c, readcharfun) + int c; + Lisp_Object readcharfun; +{ + return read_bytecode_char (c >= 0); +} + + +static int +readbyte_from_file (c, readcharfun) + int c; + Lisp_Object readcharfun; +{ + if (c >= 0) + { + ungetc (c, instream); + return 0; + } + + c = getc (instream); +#ifdef EINTR + /* Interrupted reads have been observed while reading over the network */ + while (c == EOF && ferror (instream) && errno == EINTR) + { + clearerr (instream); + c = getc (instream); + } +#endif + return (c == EOF ? -1 : c); +} + +static int +readbyte_from_string (c, readcharfun) + int c; + Lisp_Object readcharfun; +{ + Lisp_Object string = XCAR (readcharfun); + + if (c >= 0) + { + read_from_string_index--; + read_from_string_index_byte + = string_char_to_byte (string, read_from_string_index); + } + + if (read_from_string_index >= read_from_string_limit) + c = -1; + else + FETCH_STRING_CHAR_ADVANCE (c, string, + read_from_string_index, + read_from_string_index_byte); + return c; +} + + +/* Read one non-ASCII character from INSTREAM. The character is + encoded in `emacs-mule' and the first byte is already read in + C. */ + +extern char emacs_mule_bytes[256]; + +static int +read_emacs_mule_char (c, readbyte, readcharfun) + int c; + int (*readbyte) P_ ((int, Lisp_Object)); + Lisp_Object readcharfun; +{ + /* Emacs-mule coding uses at most 4-byte for one character. */ + unsigned char buf[4]; + int len = emacs_mule_bytes[c]; + struct charset *charset; + int i; + unsigned code; + + if (len == 1) + /* C is not a valid leading-code of `emacs-mule'. */ + return BYTE8_TO_CHAR (c); + + i = 0; + buf[i++] = c; + while (i < len) + { + c = (*readbyte) (-1, readcharfun); + if (c < 0xA0) + { + while (--i > 1) + (*readbyte) (buf[i], readcharfun); + return BYTE8_TO_CHAR (buf[0]); + } + buf[i++] = c; + } + + if (len == 2) + { + charset = emacs_mule_charset[c]; + code = buf[1] & 0x7F; + } + else if (len == 3) + { + if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11 + || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12) + { + charset = emacs_mule_charset[buf[1]]; + code = buf[2] & 0x7F; + } + else + { + charset = emacs_mule_charset[buf[0]]; + code = ((buf[1] << 8) | buf[2]) & 0x7F7F; + } + } + else + { + charset = emacs_mule_charset[buf[1]]; + code = ((buf[2] << 8) | buf[3]) & 0x7F7F; + } + c = DECODE_CHAR (charset, code); + if (c < 0) + Fsignal (Qinvalid_read_syntax, + Fcons (build_string ("invalid multibyte form"), Qnil)); + return c; +} + + static Lisp_Object read0 (), read1 (), read_list (), read_vector (); -static int read_multibyte (); static Lisp_Object substitute_object_recurse (); static void substitute_object_in_subtree (), substitute_in_interval (); @@ -553,11 +733,11 @@ -/* Value is non-zero if the file asswociated with file descriptor FD - is a compiled Lisp file that's safe to load. Only files compiled - with Emacs are safe to load. Files compiled with XEmacs can lead - to a crash in Fbyte_code because of an incompatible change in the - byte compiler. */ +/* Value is a version number of byte compiled code if the file + associated with file descriptor FD is a compiled Lisp file that's + safe to load. Only files compiled with Emacs are safe to load. + Files compiled with XEmacs can lead to a crash in Fbyte_code + because of an incompatible change in the byte compiler. */ static int safe_to_load_p (fd) @@ -578,7 +758,7 @@ with NUL bytes following it, but note the version. */ for (i = 0; i < nbytes && buf[i] != '\n'; ++i) if (i == 4) - version = buf[i]; + version = buf[i]; if (i < nbytes && fast_c_string_match_ignore_case (Vbytecomp_version_regexp, @@ -637,6 +817,8 @@ Lisp_Object handler; int safe_p = 1; char *fmode = "r"; + int version; + #ifdef DOS_NT fmode = "rt"; #endif /* DOS_NT */ @@ -744,17 +926,20 @@ Vloads_in_progress = Fcons (found, Vloads_in_progress); } + version = -1; if (!bcmp (&(XSTRING (found)->data[STRING_BYTES (XSTRING (found)) - 4]), - ".elc", 4)) + ".elc", 4) + || (version = safe_to_load_p (fd)) > 0) /* Load .elc files directly, but not when they are remote and have no handler! */ { if (fd != -2) { struct stat s1, s2; - int result, version; - - if (!(version = safe_to_load_p (fd))) + int result; + + if (version < 0 + && ! (version = safe_to_load_p (fd))) { safe_p = 0; if (!load_dangerous_libraries) @@ -769,30 +954,6 @@ compiled = 1; - if (version == 20) /* 21 isn't used */ - /* We're loading something compiled with Mule 3, 4 or 5, - and thus potentially emacs-mule-encoded; load it with - code conversion. (Perhaps the test should actually be - <22?) We could check further on whether the comment - mentions multibyte and only code-convert if it does. I - doubt it's worth the effort. -- fx */ - { - Lisp_Object val; - - if (fd >= 0) - emacs_close (fd); - /* load-with-code-conversion currently fails with - emacs-mule non-ASCII doc strings. */ - error ("Can't currently load Emacs 20/1-compiled files: %s", - XSTRING (found)->data); -#if 0 - val = call4 (intern ("load-with-code-conversion"), found, file, - NILP (noerror) ? Qnil : Qt, - NILP (nomessage) ? Qnil : Qt); -#endif - return unbind_to (count, val); - } - #ifdef DOS_NT fmode = "rb"; #endif /* DOS_NT */ @@ -868,7 +1029,16 @@ load_descriptor_list = Fcons (make_number (fileno (stream)), load_descriptor_list); load_in_progress++; - readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil); + if (version >= 22) + readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil); + else + { + /* We can't handle a file which was compiled with + byte-compile-dynamic by older version of Emacs. */ + specbind (Qload_force_doc_strings, Qt); + readevalloop (Qget_emacs_mule_file_char, stream, file, Feval, 0, + Qnil, Qnil); + } unbind_to (count, Qnil); /* Run any load-hooks for this file. */ @@ -1242,8 +1412,6 @@ record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil); load_convert_to_unibyte = !NILP (unibyte); - readchar_backlog = -1; - GCPRO1 (sourcename); LOADHIST_ATTACH (sourcename); @@ -1423,14 +1591,13 @@ if (EQ (stream, Qt)) stream = Qread_char; - readchar_backlog = -1; new_backquote_flag = 0; read_objects = Qnil; if (EQ (stream, Qread_char)) return Fread_minibuffer (build_string ("Lisp expression: "), Qnil); - if (STRINGP (stream)) + if (STRINGP (stream) || ((CONSP (stream) && STRINGP (XCAR (stream))))) return Fcar (Fread_from_string (stream, Qnil, Qnil)); return read0 (stream); @@ -1445,18 +1612,23 @@ Lisp_Object string, start, end; { int startval, endval; + Lisp_Object str; Lisp_Object tem; - CHECK_STRING (string); + if (CONSP (string)) + str = XCAR (string); + else + str = string; + CHECK_STRING (str); if (NILP (end)) - endval = XSTRING (string)->size; + endval = XSTRING (str)->size; else { CHECK_NUMBER (end); endval = XINT (end); - if (endval < 0 || endval > XSTRING (string)->size) - args_out_of_range (string, end); + if (endval < 0 || endval > XSTRING (str)->size) + args_out_of_range (str, end); } if (NILP (start)) @@ -1466,11 +1638,11 @@ CHECK_NUMBER (start); startval = XINT (start); if (startval < 0 || startval > endval) - args_out_of_range (string, start); + args_out_of_range (str, start); } read_from_string_index = startval; - read_from_string_index_byte = string_char_to_byte (string, startval); + read_from_string_index_byte = string_char_to_byte (str, startval); read_from_string_limit = endval; new_backquote_flag = 0; @@ -1502,65 +1674,16 @@ static int read_buffer_size; static char *read_buffer; -/* Read multibyte form and return it as a character. C is a first - byte of multibyte form, and rest of them are read from - READCHARFUN. Store the byte length of the form into *NBYTES. */ +/* Read a \-escape sequence, assuming we already read the `\'. + If the escape sequence forces unibyte, return eight-bit-char. */ static int -read_multibyte (c, readcharfun, nbytes) - register int c; - Lisp_Object readcharfun; - int *nbytes; -{ - /* We need the actual character code of this multibyte - characters. */ - unsigned char str[MAX_MULTIBYTE_LENGTH]; - int len = 0; - int bytes = BYTES_BY_CHAR_HEAD (c); - - str[len++] = c; - while (len < bytes) - { - c = READCHAR; - if (CHAR_HEAD_P (c)) - { - UNREAD (c); - break; - } - str[len++] = c; - } - - if (len == bytes && MULTIBYTE_LENGTH_NO_CHECK (str) > 0) - { - *nbytes = len; - return STRING_CHAR (str, len); - } - /* The byte sequence is not valid as multibyte. Unread all bytes - but the first one, and return the first byte. */ - while (--len > 0) - UNREAD (str[len]); - *nbytes = 1; - return str[0]; -} - -/* Read a \-escape sequence, assuming we already read the `\'. - If the escape sequence forces unibyte, store 1 into *BYTEREP. - If the escape sequence forces multibyte and the returned character - is raw 8-bit char, store 2 into *BYTEREP. - If the escape sequence forces multibyte and the returned character - is not raw 8-bit char, store 3 into *BYTEREP. - Otherwise store 0 into *BYTEREP. */ - -static int -read_escape (readcharfun, stringp, byterep) +read_escape (readcharfun, stringp) Lisp_Object readcharfun; int stringp; - int *byterep; { register int c = READCHAR; - *byterep = 0; - switch (c) { case -1: @@ -1597,7 +1720,7 @@ error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | meta_modifier; case 'S': @@ -1606,7 +1729,7 @@ error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | shift_modifier; case 'H': @@ -1615,7 +1738,7 @@ error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | hyper_modifier; case 'A': @@ -1624,7 +1747,7 @@ error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | alt_modifier; case 's': @@ -1633,7 +1756,7 @@ error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | super_modifier; case 'C': @@ -1643,7 +1766,7 @@ case '^': c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); if ((c & ~CHAR_MODIFIER_MASK) == '?') return 0177 | (c & CHAR_MODIFIER_MASK); else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) @@ -1683,10 +1806,8 @@ } } - if (c < 0x100) - *byterep = 1; - else - *byterep = 3; + if (! ASCII_BYTE_P (i)) + i = BYTE8_TO_CHAR (i); return i; } @@ -1721,22 +1842,11 @@ } if (count < 3 && i >= 0x80) - *byterep = 2; - else - *byterep = 3; + return BYTE8_TO_CHAR (i); return i; } default: - if (EQ (readcharfun, Qget_file_char) - && BASE_LEADING_CODE_P (c)) - { - int nbytes; - - c = read_multibyte (c, readcharfun, &nbytes); - if (nbytes > 1) - *byterep = 3; - } return c; } } @@ -1824,6 +1934,7 @@ int uninterned_symbol = 0; *pch = 0; + load_each_byte = 0; retry: @@ -1898,12 +2009,14 @@ UNREAD (c); tmp = read1 (readcharfun, pch, first_in_list); - if (size_in_chars != XSTRING (tmp)->size - /* We used to print 1 char too many - when the number of bits was a multiple of 8. - Accept such input in case it came from an old version. */ - && ! (XFASTINT (length) - == (XSTRING (tmp)->size - 1) * BITS_PER_CHAR)) + if (STRING_MULTIBYTE (tmp) + || (size_in_chars != XSTRING (tmp)->size + /* We used to print 1 char too many + when the number of bits was a multiple of 8. + Accept such input in case it came from an old + version. */ + && ! (XFASTINT (length) + == (XSTRING (tmp)->size - 1) * BITS_PER_CHAR))) Fsignal (Qinvalid_read_syntax, Fcons (make_string ("#&...", 5), Qnil)); @@ -1969,6 +2082,7 @@ { int i, nskip = 0; + load_each_byte = 1; /* Read a decimal integer. */ while ((c = READCHAR) >= 0 && c >= '0' && c <= '9') @@ -1979,7 +2093,9 @@ if (c >= 0) UNREAD (c); - if (load_force_doc_strings && EQ (readcharfun, Qget_file_char)) + if (load_force_doc_strings + && (EQ (readcharfun, Qget_file_char) + || EQ (readcharfun, Qget_emacs_mule_file_char))) { /* If we are supposed to force doc strings into core right now, record the last string that we skipped, @@ -2031,6 +2147,7 @@ c = READCHAR; } + load_each_byte = 0; goto retry; } if (c == '$') @@ -2152,17 +2269,18 @@ case '?': { - int discard; + int modifiers; c = READCHAR; if (c < 0) end_of_file_error (); - if (c == '\\') - c = read_escape (readcharfun, 0, &discard); - else if (EQ (readcharfun, Qget_file_char) - && BASE_LEADING_CODE_P (c)) - c = read_multibyte (c, readcharfun, &discard); + c = read_escape (readcharfun, 0); + modifiers = c & CHAR_MODIFIER_MASK; + c &= ~CHAR_MODIFIER_MASK; + if (CHAR_BYTE8_P (c)) + c = CHAR_TO_BYTE8 (c); + c |= modifiers; return make_number (c); } @@ -2196,9 +2314,8 @@ if (c == '\\') { int modifiers; - int byterep; - - c = read_escape (readcharfun, 1, &byterep); + + c = read_escape (readcharfun, 1); /* C is -1 if \ newline has just been seen */ if (c == -1) @@ -2211,26 +2328,11 @@ modifiers = c & CHAR_MODIFIER_MASK; c = c & ~CHAR_MODIFIER_MASK; - if (byterep == 1) - { - force_singlebyte = 1; - if (c >= 0x80) - /* Raw 8-bit code */ - c = BYTE8_TO_CHAR (c); - } - else if (byterep > 1) - { - force_multibyte = 1; - if (byterep == 2) - c = BYTE8_TO_CHAR (c); - } - else if (c >= 0x80) - { - force_singlebyte = 1; - c = BYTE8_TO_CHAR (c); - } - - if (ASCII_CHAR_P (c)) + if (CHAR_BYTE8_P (c)) + force_singlebyte = 1; + else if (! ASCII_CHAR_P (c)) + force_multibyte = 1; + else /* i.e. ASCII_CHAR_P (c) */ { /* Allow `\C- ' and `\C-?'. */ if (modifiers == CHAR_CTL) @@ -2264,34 +2366,10 @@ error ("Invalid modifier in string"); p += CHAR_STRING (c, (unsigned char *) p); } - else if (c >= 0x80) + else { - if (EQ (readcharfun, Qget_file_char)) - { - if (BASE_LEADING_CODE_P (c)) - { - int nbytes; - c = read_multibyte (c, readcharfun, &nbytes); - if (nbytes > 1) - force_multibyte = 1; - else - { - force_singlebyte = 1; - c = BYTE8_TO_CHAR (c); - } - } - else - { - force_singlebyte = 1; - c = BYTE8_TO_CHAR (c); - } - } - else - force_multibyte = 1; p += CHAR_STRING (c, (unsigned char *) p); } - else - *p++ = c; nchars++; } if (c < 0) @@ -2371,11 +2449,7 @@ quoted = 1; } - if (! SINGLE_BYTE_CHAR_P (c)) - p += CHAR_STRING (c, p); - else - *p++ = c; - + p += CHAR_STRING (c, p); c = READCHAR; } @@ -2702,7 +2776,7 @@ XSTRING (bytestr)->size = STRING_BYTES (XSTRING (bytestr)); SET_STRING_BYTES (XSTRING (bytestr), -1); - item = Fread (bytestr); + item = Fread (Fcons (bytestr, readcharfun)); if (!CONSP (item)) error ("invalid byte code"); @@ -2715,6 +2789,15 @@ /* Now handle the bytecode slot. */ ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr; } + else if (i == COMPILED_DOC_STRING + && STRINGP (item) + && ! STRING_MULTIBYTE (item)) + { + if (EQ (readcharfun, Qget_emacs_mule_file_char)) + item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil); + else + item = Fstring_as_multibyte (item); + } } ptr[i] = read_pure ? Fpurecopy (item) : item; otem = XCONS (tem); @@ -2812,7 +2895,14 @@ if (doc_reference == 2) { /* Get a doc string from the file we are loading. - If it's in saved_doc_string, get it from there. */ + If it's in saved_doc_string, get it from there. + + Here, we don't know if the string is a + bytecode string or a doc string. As a + bytecode string must be unibyte, we always + return a unibyte string. If it is actually a + doc string, caller must make it + multibyte. */ int pos = XINT (XCDR (val)); /* Position is negative for user variables. */ if (pos < 0) pos = -pos; @@ -2844,8 +2934,8 @@ saved_doc_string[to++] = c; } - return make_string (saved_doc_string + start, - to - start); + return make_unibyte_string (saved_doc_string + start, + to - start); } /* Look in prev_saved_doc_string the same way. */ else if (pos >= prev_saved_doc_string_position @@ -2876,11 +2966,12 @@ prev_saved_doc_string[to++] = c; } - return make_string (prev_saved_doc_string + start, - to - start); + return make_unibyte_string (prev_saved_doc_string + + start, + to - start); } else - return get_doc_string (val, 0, 0); + return get_doc_string (val, 1, 0); } return val; @@ -3764,6 +3855,12 @@ Qget_file_char = intern ("get-file-char"); staticpro (&Qget_file_char); + Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char"); + staticpro (&Qget_emacs_mule_file_char); + + Qload_force_doc_strings = intern ("load-force-doc-strings"); + staticpro (&Qload_force_doc_strings); + Qbackquote = intern ("`"); staticpro (&Qbackquote); Qcomma = intern (",");