comparison src/lread.c @ 89483:2f877ed80fa6

*** empty log message ***
author Kenichi Handa <handa@m17n.org>
date Mon, 08 Sep 2003 12:53:41 +0000
parents 375f2633d815 85b37317d5ea
children 68c22ea6027c
comparison
equal deleted inserted replaced
88123:375f2633d815 89483:2f877ed80fa6
27 #include <sys/file.h> 27 #include <sys/file.h>
28 #include <errno.h> 28 #include <errno.h>
29 #include "lisp.h" 29 #include "lisp.h"
30 #include "intervals.h" 30 #include "intervals.h"
31 #include "buffer.h" 31 #include "buffer.h"
32 #include "character.h"
32 #include "charset.h" 33 #include "charset.h"
34 #include "coding.h"
33 #include <epaths.h> 35 #include <epaths.h>
34 #include "commands.h" 36 #include "commands.h"
35 #include "keyboard.h" 37 #include "keyboard.h"
36 #include "termhooks.h" 38 #include "termhooks.h"
37 #include "coding.h" 39 #include "coding.h"
84 Lisp_Object Qvariable_documentation, Vvalues, Vstandard_input, Vafter_load_alist; 86 Lisp_Object Qvariable_documentation, Vvalues, Vstandard_input, Vafter_load_alist;
85 Lisp_Object Qascii_character, Qload, Qload_file_name; 87 Lisp_Object Qascii_character, Qload, Qload_file_name;
86 Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction; 88 Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction;
87 Lisp_Object Qinhibit_file_name_operation; 89 Lisp_Object Qinhibit_file_name_operation;
88 90
91 /* Used instead of Qget_file_char while loading *.elc files compiled
92 by Emacs 21 or older. */
93 static Lisp_Object Qget_emacs_mule_file_char;
94
95 static Lisp_Object Qload_force_doc_strings;
96
89 extern Lisp_Object Qevent_symbol_element_mask; 97 extern Lisp_Object Qevent_symbol_element_mask;
90 extern Lisp_Object Qfile_exists_p; 98 extern Lisp_Object Qfile_exists_p;
91 99
92 /* non-zero if inside `load' */ 100 /* non-zero if inside `load' */
93 int load_in_progress; 101 int load_in_progress;
127 static int load_force_doc_strings; 135 static int load_force_doc_strings;
128 136
129 /* Nonzero means read should convert strings to unibyte. */ 137 /* Nonzero means read should convert strings to unibyte. */
130 static int load_convert_to_unibyte; 138 static int load_convert_to_unibyte;
131 139
140 /* Nonzero means READCHAR should read bytes one by one (not character)
141 when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char.
142 This is set to 1 by read1 temporarily while handling #@NUMBER. */
143 static int load_each_byte;
144
132 /* Function to use for loading an Emacs lisp source file (not 145 /* Function to use for loading an Emacs lisp source file (not
133 compiled) instead of readevalloop. */ 146 compiled) instead of readevalloop. */
134 Lisp_Object Vload_source_file_function; 147 Lisp_Object Vload_source_file_function;
135 148
136 /* List of all DEFVAR_BOOL variables. Used by the byte optimizer. */ 149 /* List of all DEFVAR_BOOL variables. Used by the byte optimizer. */
155 /* For use within read-from-string (this reader is non-reentrant!!) */ 168 /* For use within read-from-string (this reader is non-reentrant!!) */
156 static int read_from_string_index; 169 static int read_from_string_index;
157 static int read_from_string_index_byte; 170 static int read_from_string_index_byte;
158 static int read_from_string_limit; 171 static int read_from_string_limit;
159 172
160 /* Number of bytes left to read in the buffer character
161 that `readchar' has already advanced over. */
162 static int readchar_backlog;
163 /* Number of characters read in the current call to Fread or 173 /* Number of characters read in the current call to Fread or
164 Fread_from_string. */ 174 Fread_from_string. */
165 static int readchar_count; 175 static int readchar_count;
166 176
167 /* This contains the last string skipped with #@. */ 177 /* This contains the last string skipped with #@. */
201 211
202 /* A regular expression used to detect files compiled with Emacs. */ 212 /* A regular expression used to detect files compiled with Emacs. */
203 213
204 static Lisp_Object Vbytecomp_version_regexp; 214 static Lisp_Object Vbytecomp_version_regexp;
205 215
206 static void to_multibyte P_ ((char **, char **, int *)); 216 static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object),
217 Lisp_Object));
218
207 static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object, 219 static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object,
208 Lisp_Object (*) (), int, 220 Lisp_Object (*) (), int,
209 Lisp_Object, Lisp_Object)); 221 Lisp_Object, Lisp_Object));
210 static Lisp_Object load_unwind P_ ((Lisp_Object)); 222 static Lisp_Object load_unwind P_ ((Lisp_Object));
211 static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object)); 223 static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object));
212 224
213 225
226 /* Functions that read one byte from the current source READCHARFUN
227 or unreads one byte. If the integer argument C is -1, it returns
228 one read byte, or -1 when there's no more byte in the source. If C
229 is 0 or positive, it unreads C, and the return value is not
230 interesting. */
231
232 static int readbyte_for_lambda P_ ((int, Lisp_Object));
233 static int readbyte_from_file P_ ((int, Lisp_Object));
234 static int readbyte_from_string P_ ((int, Lisp_Object));
235
214 /* Handle unreading and rereading of characters. 236 /* Handle unreading and rereading of characters.
215 Write READCHAR to read a character, 237 Write READCHAR to read a character,
216 UNREAD(c) to unread c to be read again. 238 UNREAD(c) to unread c to be read again.
217 239
218 The READCHAR and UNREAD macros are meant for reading/unreading a 240 These macros correctly read/unread multibyte characters. */
219 byte code; they do not handle multibyte characters. The caller
220 should manage them if necessary.
221
222 [ Actually that seems to be a lie; READCHAR will definitely read
223 multibyte characters from buffer sources, at least. Is the
224 comment just out of date?
225 -- Colin Walters <walters@gnu.org>, 22 May 2002 16:36:50 -0400 ]
226 */
227 241
228 #define READCHAR readchar (readcharfun) 242 #define READCHAR readchar (readcharfun)
229 #define UNREAD(c) unreadchar (readcharfun, c) 243 #define UNREAD(c) unreadchar (readcharfun, c)
244
245 /* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
246 Qlambda, or a cons, we use this to keep an unread character because
247 a file stream can't handle multibyte-char unreading. The value -1
248 means that there's no unread character. */
249 static int unread_char;
230 250
231 static int 251 static int
232 readchar (readcharfun) 252 readchar (readcharfun)
233 Lisp_Object readcharfun; 253 Lisp_Object readcharfun;
234 { 254 {
235 Lisp_Object tem; 255 Lisp_Object tem;
236 register int c; 256 register int c;
257 int (*readbyte) P_ ((int, Lisp_Object));
258 unsigned char buf[MAX_MULTIBYTE_LENGTH];
259 int i, len;
260 int emacs_mule_encoding = 0;
237 261
238 readchar_count++; 262 readchar_count++;
239 263
240 if (BUFFERP (readcharfun)) 264 if (BUFFERP (readcharfun))
241 { 265 {
242 register struct buffer *inbuffer = XBUFFER (readcharfun); 266 register struct buffer *inbuffer = XBUFFER (readcharfun);
243 267
244 int pt_byte = BUF_PT_BYTE (inbuffer); 268 int pt_byte = BUF_PT_BYTE (inbuffer);
245 int orig_pt_byte = pt_byte;
246
247 if (readchar_backlog > 0)
248 /* We get the address of the byte just passed,
249 which is the last byte of the character.
250 The other bytes in this character are consecutive with it,
251 because the gap can't be in the middle of a character. */
252 return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
253 - --readchar_backlog);
254 269
255 if (pt_byte >= BUF_ZV_BYTE (inbuffer)) 270 if (pt_byte >= BUF_ZV_BYTE (inbuffer))
256 return -1; 271 return -1;
257
258 readchar_backlog = -1;
259 272
260 if (! NILP (inbuffer->enable_multibyte_characters)) 273 if (! NILP (inbuffer->enable_multibyte_characters))
261 { 274 {
262 /* Fetch the character code from the buffer. */ 275 /* Fetch the character code from the buffer. */
263 unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte); 276 unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte);
265 c = STRING_CHAR (p, pt_byte - orig_pt_byte); 278 c = STRING_CHAR (p, pt_byte - orig_pt_byte);
266 } 279 }
267 else 280 else
268 { 281 {
269 c = BUF_FETCH_BYTE (inbuffer, pt_byte); 282 c = BUF_FETCH_BYTE (inbuffer, pt_byte);
283 if (! ASCII_BYTE_P (c))
284 c = BYTE8_TO_CHAR (c);
270 pt_byte++; 285 pt_byte++;
271 } 286 }
272 SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); 287 SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
273 288
274 return c; 289 return c;
276 if (MARKERP (readcharfun)) 291 if (MARKERP (readcharfun))
277 { 292 {
278 register struct buffer *inbuffer = XMARKER (readcharfun)->buffer; 293 register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
279 294
280 int bytepos = marker_byte_position (readcharfun); 295 int bytepos = marker_byte_position (readcharfun);
281 int orig_bytepos = bytepos;
282
283 if (readchar_backlog > 0)
284 /* We get the address of the byte just passed,
285 which is the last byte of the character.
286 The other bytes in this character are consecutive with it,
287 because the gap can't be in the middle of a character. */
288 return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
289 - --readchar_backlog);
290 296
291 if (bytepos >= BUF_ZV_BYTE (inbuffer)) 297 if (bytepos >= BUF_ZV_BYTE (inbuffer))
292 return -1; 298 return -1;
293
294 readchar_backlog = -1;
295 299
296 if (! NILP (inbuffer->enable_multibyte_characters)) 300 if (! NILP (inbuffer->enable_multibyte_characters))
297 { 301 {
298 /* Fetch the character code from the buffer. */ 302 /* Fetch the character code from the buffer. */
299 unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos); 303 unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos);
301 c = STRING_CHAR (p, bytepos - orig_bytepos); 305 c = STRING_CHAR (p, bytepos - orig_bytepos);
302 } 306 }
303 else 307 else
304 { 308 {
305 c = BUF_FETCH_BYTE (inbuffer, bytepos); 309 c = BUF_FETCH_BYTE (inbuffer, bytepos);
310 if (! ASCII_BYTE_P (c))
311 c = BYTE8_TO_CHAR (c);
306 bytepos++; 312 bytepos++;
307 } 313 }
308 314
309 XMARKER (readcharfun)->bytepos = bytepos; 315 XMARKER (readcharfun)->bytepos = bytepos;
310 XMARKER (readcharfun)->charpos++; 316 XMARKER (readcharfun)->charpos++;
311 317
312 return c; 318 return c;
313 } 319 }
314 320
315 if (EQ (readcharfun, Qlambda)) 321 if (EQ (readcharfun, Qlambda))
316 return read_bytecode_char (0); 322 {
323 readbyte = readbyte_for_lambda;
324 goto read_multibyte;
325 }
317 326
318 if (EQ (readcharfun, Qget_file_char)) 327 if (EQ (readcharfun, Qget_file_char))
319 { 328 {
320 c = getc (instream); 329 readbyte = readbyte_from_file;
321 #ifdef EINTR 330 goto read_multibyte;
322 /* Interrupted reads have been observed while reading over the network */
323 while (c == EOF && ferror (instream) && errno == EINTR)
324 {
325 clearerr (instream);
326 c = getc (instream);
327 }
328 #endif
329 return c;
330 } 331 }
331 332
332 if (STRINGP (readcharfun)) 333 if (STRINGP (readcharfun))
333 { 334 {
334 if (read_from_string_index >= read_from_string_limit) 335 if (read_from_string_index >= read_from_string_limit)
339 read_from_string_index_byte); 340 read_from_string_index_byte);
340 341
341 return c; 342 return c;
342 } 343 }
343 344
345 if (CONSP (readcharfun))
346 {
347 /* This is the case that read_vector is reading from a unibyte
348 string that contains a byte sequence previously skipped
349 because of #@NUMBER. The car part of readcharfun is that
350 string, and the cdr part is a value of readcharfun given to
351 read_vector. */
352 readbyte = readbyte_from_string;
353 if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
354 emacs_mule_encoding = 1;
355 goto read_multibyte;
356 }
357
358 if (EQ (readcharfun, Qget_emacs_mule_file_char))
359 {
360 readbyte = readbyte_from_file;
361 emacs_mule_encoding = 1;
362 goto read_multibyte;
363 }
364
344 tem = call0 (readcharfun); 365 tem = call0 (readcharfun);
345 366
346 if (NILP (tem)) 367 if (NILP (tem))
347 return -1; 368 return -1;
348 return XINT (tem); 369 return XINT (tem);
370
371 read_multibyte:
372 if (unread_char >= 0)
373 {
374 c = unread_char;
375 unread_char = -1;
376 return c;
377 }
378 c = (*readbyte) (-1, readcharfun);
379 if (c < 0 || ASCII_BYTE_P (c) || load_each_byte)
380 return c;
381 if (emacs_mule_encoding)
382 return read_emacs_mule_char (c, readbyte, readcharfun);
383 i = 0;
384 buf[i++] = c;
385 len = BYTES_BY_CHAR_HEAD (c);
386 while (i < len)
387 {
388 c = (*readbyte) (-1, readcharfun);
389 if (c < 0 || ! TRAILING_CODE_P (c))
390 {
391 while (--i > 1)
392 (*readbyte) (buf[i], readcharfun);
393 return BYTE8_TO_CHAR (buf[0]);
394 }
395 buf[i++] = c;
396 }
397 return STRING_CHAR (buf, i);
349 } 398 }
350 399
351 /* Unread the character C in the way appropriate for the stream READCHARFUN. 400 /* Unread the character C in the way appropriate for the stream READCHARFUN.
352 If the stream is a user function, call it with the char as argument. */ 401 If the stream is a user function, call it with the char as argument. */
353 402
364 else if (BUFFERP (readcharfun)) 413 else if (BUFFERP (readcharfun))
365 { 414 {
366 struct buffer *b = XBUFFER (readcharfun); 415 struct buffer *b = XBUFFER (readcharfun);
367 int bytepos = BUF_PT_BYTE (b); 416 int bytepos = BUF_PT_BYTE (b);
368 417
369 if (readchar_backlog >= 0) 418 BUF_PT (b)--;
370 readchar_backlog++; 419 if (! NILP (b->enable_multibyte_characters))
420 BUF_DEC_POS (b, bytepos);
371 else 421 else
372 { 422 bytepos--;
373 BUF_PT (b)--; 423
374 if (! NILP (b->enable_multibyte_characters)) 424 BUF_PT_BYTE (b) = bytepos;
375 BUF_DEC_POS (b, bytepos);
376 else
377 bytepos--;
378
379 BUF_PT_BYTE (b) = bytepos;
380 }
381 } 425 }
382 else if (MARKERP (readcharfun)) 426 else if (MARKERP (readcharfun))
383 { 427 {
384 struct buffer *b = XMARKER (readcharfun)->buffer; 428 struct buffer *b = XMARKER (readcharfun)->buffer;
385 int bytepos = XMARKER (readcharfun)->bytepos; 429 int bytepos = XMARKER (readcharfun)->bytepos;
386 430
387 if (readchar_backlog >= 0) 431 XMARKER (readcharfun)->charpos--;
388 readchar_backlog++; 432 if (! NILP (b->enable_multibyte_characters))
433 BUF_DEC_POS (b, bytepos);
389 else 434 else
390 { 435 bytepos--;
391 XMARKER (readcharfun)->charpos--; 436
392 if (! NILP (b->enable_multibyte_characters)) 437 XMARKER (readcharfun)->bytepos = bytepos;
393 BUF_DEC_POS (b, bytepos);
394 else
395 bytepos--;
396
397 XMARKER (readcharfun)->bytepos = bytepos;
398 }
399 } 438 }
400 else if (STRINGP (readcharfun)) 439 else if (STRINGP (readcharfun))
401 { 440 {
402 read_from_string_index--; 441 read_from_string_index--;
403 read_from_string_index_byte 442 read_from_string_index_byte
404 = string_char_to_byte (readcharfun, read_from_string_index); 443 = string_char_to_byte (readcharfun, read_from_string_index);
405 } 444 }
445 else if (CONSP (readcharfun))
446 {
447 unread_char = c;
448 }
406 else if (EQ (readcharfun, Qlambda)) 449 else if (EQ (readcharfun, Qlambda))
407 read_bytecode_char (1); 450 {
408 else if (EQ (readcharfun, Qget_file_char)) 451 unread_char = c;
409 ungetc (c, instream); 452 }
453 else if (EQ (readcharfun, Qget_file_char)
454 || EQ (readcharfun, Qget_emacs_mule_file_char))
455 {
456 if (load_each_byte)
457 ungetc (c, instream);
458 else
459 unread_char = c;
460 }
410 else 461 else
411 call1 (readcharfun, make_number (c)); 462 call1 (readcharfun, make_number (c));
412 } 463 }
464
465 static int
466 readbyte_for_lambda (c, readcharfun)
467 int c;
468 Lisp_Object readcharfun;
469 {
470 return read_bytecode_char (c >= 0);
471 }
472
473
474 static int
475 readbyte_from_file (c, readcharfun)
476 int c;
477 Lisp_Object readcharfun;
478 {
479 if (c >= 0)
480 {
481 ungetc (c, instream);
482 return 0;
483 }
484
485 c = getc (instream);
486 #ifdef EINTR
487 /* Interrupted reads have been observed while reading over the network */
488 while (c == EOF && ferror (instream) && errno == EINTR)
489 {
490 clearerr (instream);
491 c = getc (instream);
492 }
493 #endif
494 return (c == EOF ? -1 : c);
495 }
496
497 static int
498 readbyte_from_string (c, readcharfun)
499 int c;
500 Lisp_Object readcharfun;
501 {
502 Lisp_Object string = XCAR (readcharfun);
503
504 if (c >= 0)
505 {
506 read_from_string_index--;
507 read_from_string_index_byte
508 = string_char_to_byte (string, read_from_string_index);
509 }
510
511 if (read_from_string_index >= read_from_string_limit)
512 c = -1;
513 else
514 FETCH_STRING_CHAR_ADVANCE (c, string,
515 read_from_string_index,
516 read_from_string_index_byte);
517 return c;
518 }
519
520
521 /* Read one non-ASCII character from INSTREAM. The character is
522 encoded in `emacs-mule' and the first byte is already read in
523 C. */
524
525 extern char emacs_mule_bytes[256];
526
527 static int
528 read_emacs_mule_char (c, readbyte, readcharfun)
529 int c;
530 int (*readbyte) P_ ((int, Lisp_Object));
531 Lisp_Object readcharfun;
532 {
533 /* Emacs-mule coding uses at most 4-byte for one character. */
534 unsigned char buf[4];
535 int len = emacs_mule_bytes[c];
536 struct charset *charset;
537 int i;
538 unsigned code;
539
540 if (len == 1)
541 /* C is not a valid leading-code of `emacs-mule'. */
542 return BYTE8_TO_CHAR (c);
543
544 i = 0;
545 buf[i++] = c;
546 while (i < len)
547 {
548 c = (*readbyte) (-1, readcharfun);
549 if (c < 0xA0)
550 {
551 while (--i > 1)
552 (*readbyte) (buf[i], readcharfun);
553 return BYTE8_TO_CHAR (buf[0]);
554 }
555 buf[i++] = c;
556 }
557
558 if (len == 2)
559 {
560 charset = emacs_mule_charset[buf[0]];
561 code = buf[1] & 0x7F;
562 }
563 else if (len == 3)
564 {
565 if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11
566 || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12)
567 {
568 charset = emacs_mule_charset[buf[1]];
569 code = buf[2] & 0x7F;
570 }
571 else
572 {
573 charset = emacs_mule_charset[buf[0]];
574 code = ((buf[1] << 8) | buf[2]) & 0x7F7F;
575 }
576 }
577 else
578 {
579 charset = emacs_mule_charset[buf[1]];
580 code = ((buf[2] << 8) | buf[3]) & 0x7F7F;
581 }
582 c = DECODE_CHAR (charset, code);
583 if (c < 0)
584 Fsignal (Qinvalid_read_syntax,
585 Fcons (build_string ("invalid multibyte form"), Qnil));
586 return c;
587 }
588
413 589
414 static Lisp_Object read_internal_start P_ ((Lisp_Object, Lisp_Object, 590 static Lisp_Object read_internal_start P_ ((Lisp_Object, Lisp_Object,
415 Lisp_Object)); 591 Lisp_Object));
416 static Lisp_Object read0 P_ ((Lisp_Object)); 592 static Lisp_Object read0 P_ ((Lisp_Object));
417 static Lisp_Object read1 P_ ((Lisp_Object, int *, int)); 593 static Lisp_Object read1 P_ ((Lisp_Object, int *, int));
418 594
419 static Lisp_Object read_list P_ ((int, Lisp_Object)); 595 static Lisp_Object read_list P_ ((int, Lisp_Object));
420 static Lisp_Object read_vector P_ ((Lisp_Object, int)); 596 static Lisp_Object read_vector P_ ((Lisp_Object, int));
421 static int read_multibyte P_ ((int, Lisp_Object));
422 597
423 static Lisp_Object substitute_object_recurse P_ ((Lisp_Object, Lisp_Object, 598 static Lisp_Object substitute_object_recurse P_ ((Lisp_Object, Lisp_Object,
424 Lisp_Object)); 599 Lisp_Object));
425 static void substitute_object_in_subtree P_ ((Lisp_Object, 600 static void substitute_object_in_subtree P_ ((Lisp_Object,
426 Lisp_Object)); 601 Lisp_Object));
591 return val; 766 return val;
592 } 767 }
593 768
594 769
595 770
596 /* Value is non-zero if the file asswociated with file descriptor FD 771 /* Value is a version number of byte compiled code if the file
597 is a compiled Lisp file that's safe to load. Only files compiled 772 asswociated with file descriptor FD is a compiled Lisp file that's
598 with Emacs are safe to load. Files compiled with XEmacs can lead 773 safe to load. Only files compiled with Emacs are safe to load.
599 to a crash in Fbyte_code because of an incompatible change in the 774 Files compiled with XEmacs can lead to a crash in Fbyte_code
600 byte compiler. */ 775 because of an incompatible change in the byte compiler. */
601 776
602 static int 777 static int
603 safe_to_load_p (fd) 778 safe_to_load_p (fd)
604 int fd; 779 int fd;
605 { 780 {
606 char buf[512]; 781 char buf[512];
607 int nbytes, i; 782 int nbytes, i;
608 int safe_p = 1; 783 int safe_p = 1;
784 int version = 1;
609 785
610 /* Read the first few bytes from the file, and look for a line 786 /* Read the first few bytes from the file, and look for a line
611 specifying the byte compiler version used. */ 787 specifying the byte compiler version used. */
612 nbytes = emacs_read (fd, buf, sizeof buf - 1); 788 nbytes = emacs_read (fd, buf, sizeof buf - 1);
613 if (nbytes > 0) 789 if (nbytes > 0)
614 { 790 {
615 buf[nbytes] = '\0'; 791 buf[nbytes] = '\0';
616 792
617 /* Skip to the next newline, skipping over the initial `ELC' 793 /* Skip to the next newline, skipping over the initial `ELC'
618 with NUL bytes following it. */ 794 with NUL bytes following it, but note the version. */
619 for (i = 0; i < nbytes && buf[i] != '\n'; ++i) 795 for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
620 ; 796 if (i == 4)
621 797 version = buf[i];
622 if (i < nbytes 798
623 && fast_c_string_match_ignore_case (Vbytecomp_version_regexp, 799 if (i == nbytes
800 || fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
624 buf + i) < 0) 801 buf + i) < 0)
625 safe_p = 0; 802 safe_p = 0;
626 } 803 }
804 if (safe_p)
805 safe_p = version;
627 806
628 lseek (fd, 0, SEEK_SET); 807 lseek (fd, 0, SEEK_SET);
629 return safe_p; 808 return safe_p;
630 } 809 }
631 810
681 /* 1 means we are loading a compiled file. */ 860 /* 1 means we are loading a compiled file. */
682 int compiled = 0; 861 int compiled = 0;
683 Lisp_Object handler; 862 Lisp_Object handler;
684 int safe_p = 1; 863 int safe_p = 1;
685 char *fmode = "r"; 864 char *fmode = "r";
865 int version;
866
686 #ifdef DOS_NT 867 #ifdef DOS_NT
687 fmode = "rt"; 868 fmode = "rt";
688 #endif /* DOS_NT */ 869 #endif /* DOS_NT */
689 870
690 CHECK_STRING (file); 871 CHECK_STRING (file);
796 Fcons (found, Vloads_in_progress))); 977 Fcons (found, Vloads_in_progress)));
797 record_unwind_protect (record_load_unwind, Vloads_in_progress); 978 record_unwind_protect (record_load_unwind, Vloads_in_progress);
798 Vloads_in_progress = Fcons (found, Vloads_in_progress); 979 Vloads_in_progress = Fcons (found, Vloads_in_progress);
799 } 980 }
800 981
982 version = -1;
801 if (!bcmp (SDATA (found) + SBYTES (found) - 4, 983 if (!bcmp (SDATA (found) + SBYTES (found) - 4,
802 ".elc", 4)) 984 ".elc", 4)
985 || (version = safe_to_load_p (fd)) > 0)
803 /* Load .elc files directly, but not when they are 986 /* Load .elc files directly, but not when they are
804 remote and have no handler! */ 987 remote and have no handler! */
805 { 988 {
806 if (fd != -2) 989 if (fd != -2)
807 { 990 {
808 struct stat s1, s2; 991 struct stat s1, s2;
809 int result; 992 int result;
810 993
811 if (!safe_to_load_p (fd)) 994 if (version < 0
995 && ! (version = safe_to_load_p (fd)))
812 { 996 {
813 safe_p = 0; 997 safe_p = 0;
814 if (!load_dangerous_libraries) 998 if (!load_dangerous_libraries)
815 { 999 {
816 if (fd >= 0) 1000 if (fd >= 0)
909 specbind (Qload_file_name, found); 1093 specbind (Qload_file_name, found);
910 specbind (Qinhibit_file_name_operation, Qnil); 1094 specbind (Qinhibit_file_name_operation, Qnil);
911 load_descriptor_list 1095 load_descriptor_list
912 = Fcons (make_number (fileno (stream)), load_descriptor_list); 1096 = Fcons (make_number (fileno (stream)), load_descriptor_list);
913 load_in_progress++; 1097 load_in_progress++;
914 readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil); 1098 if (! version || version >= 22)
1099 readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
1100 else
1101 {
1102 /* We can't handle a file which was compiled with
1103 byte-compile-dynamic by older version of Emacs. */
1104 specbind (Qload_force_doc_strings, Qt);
1105 readevalloop (Qget_emacs_mule_file_char, stream, file, Feval, 0,
1106 Qnil, Qnil);
1107 }
915 unbind_to (count, Qnil); 1108 unbind_to (count, Qnil);
916 1109
917 /* Run any load-hooks for this file. */ 1110 /* Run any load-hooks for this file. */
918 temp = Fassoc (file, Vafter_load_alist); 1111 temp = Fassoc (file, Vafter_load_alist);
919 if (!NILP (temp)) 1112 if (!NILP (temp))
1315 specbind (Qstandard_input, readcharfun); 1508 specbind (Qstandard_input, readcharfun);
1316 specbind (Qcurrent_load_list, Qnil); 1509 specbind (Qcurrent_load_list, Qnil);
1317 record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil); 1510 record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil);
1318 load_convert_to_unibyte = !NILP (unibyte); 1511 load_convert_to_unibyte = !NILP (unibyte);
1319 1512
1320 readchar_backlog = -1;
1321
1322 GCPRO1 (sourcename); 1513 GCPRO1 (sourcename);
1323 1514
1324 LOADHIST_ATTACH (sourcename); 1515 LOADHIST_ATTACH (sourcename);
1325 1516
1326 continue_reading_p = 1; 1517 continue_reading_p = 1;
1524 Lisp_Object start; /* Only used when stream is a string. */ 1715 Lisp_Object start; /* Only used when stream is a string. */
1525 Lisp_Object end; /* Only used when stream is a string. */ 1716 Lisp_Object end; /* Only used when stream is a string. */
1526 { 1717 {
1527 Lisp_Object retval; 1718 Lisp_Object retval;
1528 1719
1529 readchar_backlog = -1;
1530 readchar_count = 0; 1720 readchar_count = 0;
1531 new_backquote_flag = 0; 1721 new_backquote_flag = 0;
1532 read_objects = Qnil; 1722 read_objects = Qnil;
1533 if (EQ (Vread_with_symbol_positions, Qt) 1723 if (EQ (Vread_with_symbol_positions, Qt)
1534 || EQ (Vread_with_symbol_positions, stream)) 1724 || EQ (Vread_with_symbol_positions, stream))
1535 Vread_symbol_positions_list = Qnil; 1725 Vread_symbol_positions_list = Qnil;
1536 1726
1537 if (STRINGP (stream)) 1727 if (STRINGP (stream)
1728 || ((CONSP (stream) && STRINGP (XCAR (stream)))))
1538 { 1729 {
1539 int startval, endval; 1730 int startval, endval;
1731 Lisp_Object string;
1732
1733 if (STRINGP (stream))
1734 string = stream;
1735 else
1736 string = XCAR (stream);
1737
1540 if (NILP (end)) 1738 if (NILP (end))
1541 endval = SCHARS (stream); 1739 endval = SCHARS (string);
1542 else 1740 else
1543 { 1741 {
1544 CHECK_NUMBER (end); 1742 CHECK_NUMBER (end);
1545 endval = XINT (end); 1743 endval = XINT (end);
1546 if (endval < 0 || endval > SCHARS (stream)) 1744 if (endval < 0 || endval > SCHARS (string))
1547 args_out_of_range (stream, end); 1745 args_out_of_range (string, end);
1548 } 1746 }
1549 1747
1550 if (NILP (start)) 1748 if (NILP (start))
1551 startval = 0; 1749 startval = 0;
1552 else 1750 else
1553 { 1751 {
1554 CHECK_NUMBER (start); 1752 CHECK_NUMBER (start);
1555 startval = XINT (start); 1753 startval = XINT (start);
1556 if (startval < 0 || startval > endval) 1754 if (startval < 0 || startval > endval)
1557 args_out_of_range (stream, start); 1755 args_out_of_range (string, start);
1558 } 1756 }
1559 read_from_string_index = startval; 1757 read_from_string_index = startval;
1560 read_from_string_index_byte = string_char_to_byte (stream, startval); 1758 read_from_string_index_byte = string_char_to_byte (string, startval);
1561 read_from_string_limit = endval; 1759 read_from_string_limit = endval;
1562 } 1760 }
1563 1761
1564 retval = read0 (stream); 1762 retval = read0 (stream);
1565 if (EQ (Vread_with_symbol_positions, Qt) 1763 if (EQ (Vread_with_symbol_positions, Qt)
1588 } 1786 }
1589 1787
1590 static int read_buffer_size; 1788 static int read_buffer_size;
1591 static char *read_buffer; 1789 static char *read_buffer;
1592 1790
1593 /* Read multibyte form and return it as a character. C is a first 1791 /* Read a \-escape sequence, assuming we already read the `\'.
1594 byte of multibyte form, and rest of them are read from 1792 If the escape sequence forces unibyte, return eight-bit char. */
1595 READCHARFUN. */
1596 1793
1597 static int 1794 static int
1598 read_multibyte (c, readcharfun) 1795 read_escape (readcharfun, stringp)
1599 register int c;
1600 Lisp_Object readcharfun;
1601 {
1602 /* We need the actual character code of this multibyte
1603 characters. */
1604 unsigned char str[MAX_MULTIBYTE_LENGTH];
1605 int len = 0;
1606 int bytes;
1607
1608 if (c < 0)
1609 return c;
1610
1611 str[len++] = c;
1612 while ((c = READCHAR) >= 0xA0
1613 && len < MAX_MULTIBYTE_LENGTH)
1614 {
1615 str[len++] = c;
1616 readchar_count--;
1617 }
1618 UNREAD (c);
1619 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))
1620 return STRING_CHAR (str, len);
1621 /* The byte sequence is not valid as multibyte. Unread all bytes
1622 but the first one, and return the first byte. */
1623 while (--len > 0)
1624 UNREAD (str[len]);
1625 return str[0];
1626 }
1627
1628 /* Read a \-escape sequence, assuming we already read the `\'.
1629 If the escape sequence forces unibyte, store 1 into *BYTEREP.
1630 If the escape sequence forces multibyte, store 2 into *BYTEREP.
1631 Otherwise store 0 into *BYTEREP. */
1632
1633 static int
1634 read_escape (readcharfun, stringp, byterep)
1635 Lisp_Object readcharfun; 1796 Lisp_Object readcharfun;
1636 int stringp; 1797 int stringp;
1637 int *byterep;
1638 { 1798 {
1639 register int c = READCHAR; 1799 register int c = READCHAR;
1640
1641 *byterep = 0;
1642 1800
1643 switch (c) 1801 switch (c)
1644 { 1802 {
1645 case -1: 1803 case -1:
1646 end_of_file_error (); 1804 end_of_file_error ();
1674 c = READCHAR; 1832 c = READCHAR;
1675 if (c != '-') 1833 if (c != '-')
1676 error ("Invalid escape character syntax"); 1834 error ("Invalid escape character syntax");
1677 c = READCHAR; 1835 c = READCHAR;
1678 if (c == '\\') 1836 if (c == '\\')
1679 c = read_escape (readcharfun, 0, byterep); 1837 c = read_escape (readcharfun, 0);
1680 return c | meta_modifier; 1838 return c | meta_modifier;
1681 1839
1682 case 'S': 1840 case 'S':
1683 c = READCHAR; 1841 c = READCHAR;
1684 if (c != '-') 1842 if (c != '-')
1685 error ("Invalid escape character syntax"); 1843 error ("Invalid escape character syntax");
1686 c = READCHAR; 1844 c = READCHAR;
1687 if (c == '\\') 1845 if (c == '\\')
1688 c = read_escape (readcharfun, 0, byterep); 1846 c = read_escape (readcharfun, 0);
1689 return c | shift_modifier; 1847 return c | shift_modifier;
1690 1848
1691 case 'H': 1849 case 'H':
1692 c = READCHAR; 1850 c = READCHAR;
1693 if (c != '-') 1851 if (c != '-')
1694 error ("Invalid escape character syntax"); 1852 error ("Invalid escape character syntax");
1695 c = READCHAR; 1853 c = READCHAR;
1696 if (c == '\\') 1854 if (c == '\\')
1697 c = read_escape (readcharfun, 0, byterep); 1855 c = read_escape (readcharfun, 0);
1698 return c | hyper_modifier; 1856 return c | hyper_modifier;
1699 1857
1700 case 'A': 1858 case 'A':
1701 c = READCHAR; 1859 c = READCHAR;
1702 if (c != '-') 1860 if (c != '-')
1703 error ("Invalid escape character syntax"); 1861 error ("Invalid escape character syntax");
1704 c = READCHAR; 1862 c = READCHAR;
1705 if (c == '\\') 1863 if (c == '\\')
1706 c = read_escape (readcharfun, 0, byterep); 1864 c = read_escape (readcharfun, 0);
1707 return c | alt_modifier; 1865 return c | alt_modifier;
1708 1866
1709 case 's': 1867 case 's':
1710 if (stringp) 1868 if (stringp)
1711 return ' '; 1869 return ' ';
1714 UNREAD (c); 1872 UNREAD (c);
1715 return ' '; 1873 return ' ';
1716 } 1874 }
1717 c = READCHAR; 1875 c = READCHAR;
1718 if (c == '\\') 1876 if (c == '\\')
1719 c = read_escape (readcharfun, 0, byterep); 1877 c = read_escape (readcharfun, 0);
1720 return c | super_modifier; 1878 return c | super_modifier;
1721 1879
1722 case 'C': 1880 case 'C':
1723 c = READCHAR; 1881 c = READCHAR;
1724 if (c != '-') 1882 if (c != '-')
1725 error ("Invalid escape character syntax"); 1883 error ("Invalid escape character syntax");
1726 case '^': 1884 case '^':
1727 c = READCHAR; 1885 c = READCHAR;
1728 if (c == '\\') 1886 if (c == '\\')
1729 c = read_escape (readcharfun, 0, byterep); 1887 c = read_escape (readcharfun, 0);
1730 if ((c & ~CHAR_MODIFIER_MASK) == '?') 1888 if ((c & ~CHAR_MODIFIER_MASK) == '?')
1731 return 0177 | (c & CHAR_MODIFIER_MASK); 1889 return 0177 | (c & CHAR_MODIFIER_MASK);
1732 else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) 1890 else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
1733 return c | ctrl_modifier; 1891 return c | ctrl_modifier;
1734 /* ASCII control chars are made from letters (both cases), 1892 /* ASCII control chars are made from letters (both cases),
1764 UNREAD (c); 1922 UNREAD (c);
1765 break; 1923 break;
1766 } 1924 }
1767 } 1925 }
1768 1926
1769 *byterep = 1; 1927 if (i >= 0x80 && i < 0x100)
1928 i = BYTE8_TO_CHAR (i);
1770 return i; 1929 return i;
1771 } 1930 }
1772 1931
1773 case 'x': 1932 case 'x':
1774 /* A hex escape, as in ANSI C. */ 1933 /* A hex escape, as in ANSI C. */
1775 { 1934 {
1776 int i = 0; 1935 int i = 0;
1936 int count = 0;
1777 while (1) 1937 while (1)
1778 { 1938 {
1779 c = READCHAR; 1939 c = READCHAR;
1780 if (c >= '0' && c <= '9') 1940 if (c >= '0' && c <= '9')
1781 { 1941 {
1794 else 1954 else
1795 { 1955 {
1796 UNREAD (c); 1956 UNREAD (c);
1797 break; 1957 break;
1798 } 1958 }
1959 count++;
1799 } 1960 }
1800 1961
1801 *byterep = 2; 1962 if (count < 3 && i >= 0x80)
1963 return BYTE8_TO_CHAR (i);
1802 return i; 1964 return i;
1803 } 1965 }
1804 1966
1805 default: 1967 default:
1806 if (BASE_LEADING_CODE_P (c))
1807 c = read_multibyte (c, readcharfun);
1808 return c; 1968 return c;
1809 } 1969 }
1810 } 1970 }
1811 1971
1812 1972
1874 2034
1875 return make_number (sign * number); 2035 return make_number (sign * number);
1876 } 2036 }
1877 2037
1878 2038
1879 /* Convert unibyte text in read_buffer to multibyte.
1880
1881 Initially, *P is a pointer after the end of the unibyte text, and
1882 the pointer *END points after the end of read_buffer.
1883
1884 If read_buffer doesn't have enough room to hold the result
1885 of the conversion, reallocate it and adjust *P and *END.
1886
1887 At the end, make *P point after the result of the conversion, and
1888 return in *NCHARS the number of characters in the converted
1889 text. */
1890
1891 static void
1892 to_multibyte (p, end, nchars)
1893 char **p, **end;
1894 int *nchars;
1895 {
1896 int nbytes;
1897
1898 parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
1899 if (read_buffer_size < 2 * nbytes)
1900 {
1901 int offset = *p - read_buffer;
1902 read_buffer_size = 2 * max (read_buffer_size, nbytes);
1903 read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
1904 *p = read_buffer + offset;
1905 *end = read_buffer + read_buffer_size;
1906 }
1907
1908 if (nbytes != *nchars)
1909 nbytes = str_as_multibyte (read_buffer, read_buffer_size,
1910 *p - read_buffer, nchars);
1911
1912 *p = read_buffer + nbytes;
1913 }
1914
1915
1916 /* If the next token is ')' or ']' or '.', we store that character 2039 /* If the next token is ')' or ']' or '.', we store that character
1917 in *PCH and the return value is not interesting. Else, we store 2040 in *PCH and the return value is not interesting. Else, we store
1918 zero in *PCH and we read and return one lisp object. 2041 zero in *PCH and we read and return one lisp object.
1919 2042
1920 FIRST_IN_LIST is nonzero if this is the first element of a list. */ 2043 FIRST_IN_LIST is nonzero if this is the first element of a list. */
1927 { 2050 {
1928 register int c; 2051 register int c;
1929 int uninterned_symbol = 0; 2052 int uninterned_symbol = 0;
1930 2053
1931 *pch = 0; 2054 *pch = 0;
2055 load_each_byte = 0;
1932 2056
1933 retry: 2057 retry:
1934 2058
1935 c = READCHAR; 2059 c = READCHAR;
1936 if (c < 0) 2060 if (c < 0)
1958 c = READCHAR; 2082 c = READCHAR;
1959 if (c == '[') 2083 if (c == '[')
1960 { 2084 {
1961 Lisp_Object tmp; 2085 Lisp_Object tmp;
1962 tmp = read_vector (readcharfun, 0); 2086 tmp = read_vector (readcharfun, 0);
1963 if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS 2087 if (XVECTOR (tmp)->size != VECSIZE (struct Lisp_Char_Table))
1964 || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
1965 error ("Invalid size char-table"); 2088 error ("Invalid size char-table");
1966 XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp)); 2089 XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
1967 XCHAR_TABLE (tmp)->top = Qt;
1968 return tmp; 2090 return tmp;
1969 } 2091 }
1970 else if (c == '^') 2092 else if (c == '^')
1971 { 2093 {
1972 c = READCHAR; 2094 c = READCHAR;
1973 if (c == '[') 2095 if (c == '[')
1974 { 2096 {
1975 Lisp_Object tmp; 2097 Lisp_Object tmp;
2098 int depth, size;
2099
1976 tmp = read_vector (readcharfun, 0); 2100 tmp = read_vector (readcharfun, 0);
1977 if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS) 2101 if (!INTEGERP (AREF (tmp, 0)))
2102 error ("Invalid depth in char-table");
2103 depth = XINT (AREF (tmp, 0));
2104 if (depth < 1 || depth > 3)
2105 error ("Invalid depth in char-table");
2106 size = XVECTOR (tmp)->size + 2;
2107 if (chartab_size [depth] != size)
1978 error ("Invalid size char-table"); 2108 error ("Invalid size char-table");
1979 XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp)); 2109 XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
1980 XCHAR_TABLE (tmp)->top = Qnil;
1981 return tmp; 2110 return tmp;
1982 } 2111 }
1983 Fsignal (Qinvalid_read_syntax, 2112 Fsignal (Qinvalid_read_syntax,
1984 Fcons (make_string ("#^^", 3), Qnil)); 2113 Fcons (make_string ("#^^", 3), Qnil));
1985 } 2114 }
1996 int size_in_chars = ((XFASTINT (length) + BITS_PER_CHAR - 1) 2125 int size_in_chars = ((XFASTINT (length) + BITS_PER_CHAR - 1)
1997 / BITS_PER_CHAR); 2126 / BITS_PER_CHAR);
1998 2127
1999 UNREAD (c); 2128 UNREAD (c);
2000 tmp = read1 (readcharfun, pch, first_in_list); 2129 tmp = read1 (readcharfun, pch, first_in_list);
2001 if (size_in_chars != SCHARS (tmp) 2130 if (STRING_MULTIBYTE (tmp)
2002 /* We used to print 1 char too many 2131 || (size_in_chars != SCHARS (tmp)
2003 when the number of bits was a multiple of 8. 2132 /* We used to print 1 char too many
2004 Accept such input in case it came from an old version. */ 2133 when the number of bits was a multiple of 8.
2005 && ! (XFASTINT (length) 2134 Accept such input in case it came from an old
2006 == (SCHARS (tmp) - 1) * BITS_PER_CHAR)) 2135 version. */
2136 && ! (XFASTINT (length)
2137 == (SCHARS (tmp) - 1) * BITS_PER_CHAR)))
2007 Fsignal (Qinvalid_read_syntax, 2138 Fsignal (Qinvalid_read_syntax,
2008 Fcons (make_string ("#&...", 5), Qnil)); 2139 Fcons (make_string ("#&...", 5), Qnil));
2009 2140
2010 val = Fmake_bool_vector (length, Qnil); 2141 val = Fmake_bool_vector (length, Qnil);
2011 bcopy (SDATA (tmp), XBOOL_VECTOR (val)->data, 2142 bcopy (SDATA (tmp), XBOOL_VECTOR (val)->data,
2067 and function definitions. */ 2198 and function definitions. */
2068 if (c == '@') 2199 if (c == '@')
2069 { 2200 {
2070 int i, nskip = 0; 2201 int i, nskip = 0;
2071 2202
2203 load_each_byte = 1;
2072 /* Read a decimal integer. */ 2204 /* Read a decimal integer. */
2073 while ((c = READCHAR) >= 0 2205 while ((c = READCHAR) >= 0
2074 && c >= '0' && c <= '9') 2206 && c >= '0' && c <= '9')
2075 { 2207 {
2076 nskip *= 10; 2208 nskip *= 10;
2077 nskip += c - '0'; 2209 nskip += c - '0';
2078 } 2210 }
2079 if (c >= 0) 2211 if (c >= 0)
2080 UNREAD (c); 2212 UNREAD (c);
2081 2213
2082 if (load_force_doc_strings && EQ (readcharfun, Qget_file_char)) 2214 if (load_force_doc_strings
2215 && (EQ (readcharfun, Qget_file_char)
2216 || EQ (readcharfun, Qget_emacs_mule_file_char)))
2083 { 2217 {
2084 /* If we are supposed to force doc strings into core right now, 2218 /* If we are supposed to force doc strings into core right now,
2085 record the last string that we skipped, 2219 record the last string that we skipped,
2086 and record where in the file it comes from. */ 2220 and record where in the file it comes from. */
2087 2221
2129 /* Skip that many characters. */ 2263 /* Skip that many characters. */
2130 for (i = 0; i < nskip && c >= 0; i++) 2264 for (i = 0; i < nskip && c >= 0; i++)
2131 c = READCHAR; 2265 c = READCHAR;
2132 } 2266 }
2133 2267
2268 load_each_byte = 0;
2134 goto retry; 2269 goto retry;
2135 } 2270 }
2136 if (c == '!') 2271 if (c == '!')
2137 { 2272 {
2138 /* #! appears at the beginning of an executable file. 2273 /* #! appears at the beginning of an executable file.
2258 else 2393 else
2259 goto default_label; 2394 goto default_label;
2260 2395
2261 case '?': 2396 case '?':
2262 { 2397 {
2263 int discard; 2398 int modifiers;
2264 int next_char; 2399 int next_char;
2265 int ok; 2400 int ok;
2266 2401
2267 c = READCHAR; 2402 c = READCHAR;
2268 if (c < 0) 2403 if (c < 0)
2274 as there are well-established escape sequences for these. */ 2409 as there are well-established escape sequences for these. */
2275 if (c == ' ' || c == '\t') 2410 if (c == ' ' || c == '\t')
2276 return make_number (c); 2411 return make_number (c);
2277 2412
2278 if (c == '\\') 2413 if (c == '\\')
2279 c = read_escape (readcharfun, 0, &discard); 2414 c = read_escape (readcharfun, 0);
2280 else if (BASE_LEADING_CODE_P (c)) 2415 modifiers = c & CHAR_MODIFIER_MASK;
2281 c = read_multibyte (c, readcharfun); 2416 c &= ~CHAR_MODIFIER_MASK;
2417 if (CHAR_BYTE8_P (c))
2418 c = CHAR_TO_BYTE8 (c);
2419 c |= modifiers;
2282 2420
2283 next_char = READCHAR; 2421 next_char = READCHAR;
2284 if (next_char == '.') 2422 if (next_char == '.')
2285 { 2423 {
2286 /* Only a dotted-pair dot is valid after a char constant. */ 2424 /* Only a dotted-pair dot is valid after a char constant. */
2311 case '"': 2449 case '"':
2312 { 2450 {
2313 char *p = read_buffer; 2451 char *p = read_buffer;
2314 char *end = read_buffer + read_buffer_size; 2452 char *end = read_buffer + read_buffer_size;
2315 register int c; 2453 register int c;
2316 /* 1 if we saw an escape sequence specifying 2454 /* Nonzero if we saw an escape sequence specifying
2317 a multibyte character, or a multibyte character. */ 2455 a multibyte character. */
2318 int force_multibyte = 0; 2456 int force_multibyte = 0;
2319 /* 1 if we saw an escape sequence specifying 2457 /* Nonzero if we saw an escape sequence specifying
2320 a single-byte character. */ 2458 a single-byte character. */
2321 int force_singlebyte = 0; 2459 int force_singlebyte = 0;
2322 /* 1 if read_buffer contains multibyte text now. */
2323 int is_multibyte = 0;
2324 int cancel = 0; 2460 int cancel = 0;
2325 int nchars = 0; 2461 int nchars = 0;
2326 2462
2327 while ((c = READCHAR) >= 0 2463 while ((c = READCHAR) >= 0
2328 && c != '\"') 2464 && c != '\"')
2336 end = read_buffer + read_buffer_size; 2472 end = read_buffer + read_buffer_size;
2337 } 2473 }
2338 2474
2339 if (c == '\\') 2475 if (c == '\\')
2340 { 2476 {
2341 int byterep; 2477 int modifiers;
2342 2478
2343 c = read_escape (readcharfun, 1, &byterep); 2479 c = read_escape (readcharfun, 1);
2344 2480
2345 /* C is -1 if \ newline has just been seen */ 2481 /* C is -1 if \ newline has just been seen */
2346 if (c == -1) 2482 if (c == -1)
2347 { 2483 {
2348 if (p == read_buffer) 2484 if (p == read_buffer)
2349 cancel = 1; 2485 cancel = 1;
2350 continue; 2486 continue;
2351 } 2487 }
2352 2488
2353 if (byterep == 1) 2489 modifiers = c & CHAR_MODIFIER_MASK;
2490 c = c & ~CHAR_MODIFIER_MASK;
2491
2492 if (CHAR_BYTE8_P (c))
2354 force_singlebyte = 1; 2493 force_singlebyte = 1;
2355 else if (byterep == 2) 2494 else if (! ASCII_CHAR_P (c))
2495 force_multibyte = 1;
2496 else /* i.e. ASCII_CHAR_P (c) */
2497 {
2498 /* Allow `\C- ' and `\C-?'. */
2499 if (modifiers == CHAR_CTL)
2500 {
2501 if (c == ' ')
2502 c = 0, modifiers = 0;
2503 else if (c == '?')
2504 c = 127, modifiers = 0;
2505 }
2506 if (modifiers & CHAR_SHIFT)
2507 {
2508 /* Shift modifier is valid only with [A-Za-z]. */
2509 if (c >= 'A' && c <= 'Z')
2510 modifiers &= ~CHAR_SHIFT;
2511 else if (c >= 'a' && c <= 'z')
2512 c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
2513 }
2514
2515 if (modifiers & CHAR_META)
2516 {
2517 /* Move the meta bit to the right place for a
2518 string. */
2519 modifiers &= ~CHAR_META;
2520 c = BYTE8_TO_CHAR (c | 0x80);
2521 force_singlebyte = 1;
2522 }
2523 }
2524
2525 /* Any modifiers remaining are invalid. */
2526 if (modifiers)
2527 error ("Invalid modifier in string");
2528 p += CHAR_STRING (c, (unsigned char *) p);
2529 }
2530 else
2531 {
2532 p += CHAR_STRING (c, (unsigned char *) p);
2533 if (CHAR_BYTE8_P (c))
2534 force_singlebyte = 1;
2535 else if (! ASCII_CHAR_P (c))
2356 force_multibyte = 1; 2536 force_multibyte = 1;
2357 } 2537 }
2358
2359 /* A character that must be multibyte forces multibyte. */
2360 if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
2361 force_multibyte = 1;
2362
2363 /* If we just discovered the need to be multibyte,
2364 convert the text accumulated thus far. */
2365 if (force_multibyte && ! is_multibyte)
2366 {
2367 is_multibyte = 1;
2368 to_multibyte (&p, &end, &nchars);
2369 }
2370
2371 /* Allow `\C- ' and `\C-?'. */
2372 if (c == (CHAR_CTL | ' '))
2373 c = 0;
2374 else if (c == (CHAR_CTL | '?'))
2375 c = 127;
2376
2377 if (c & CHAR_SHIFT)
2378 {
2379 /* Shift modifier is valid only with [A-Za-z]. */
2380 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
2381 c &= ~CHAR_SHIFT;
2382 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
2383 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
2384 }
2385
2386 if (c & CHAR_META)
2387 /* Move the meta bit to the right place for a string. */
2388 c = (c & ~CHAR_META) | 0x80;
2389 if (c & CHAR_MODIFIER_MASK)
2390 error ("Invalid modifier in string");
2391
2392 if (is_multibyte)
2393 p += CHAR_STRING (c, p);
2394 else
2395 *p++ = c;
2396
2397 nchars++; 2538 nchars++;
2398 } 2539 }
2399 2540
2400 if (c < 0) 2541 if (c < 0)
2401 end_of_file_error (); 2542 end_of_file_error ();
2404 return zero instead. This is for doc strings 2545 return zero instead. This is for doc strings
2405 that we are really going to find in etc/DOC.nn.nn */ 2546 that we are really going to find in etc/DOC.nn.nn */
2406 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) 2547 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
2407 return make_number (0); 2548 return make_number (0);
2408 2549
2409 if (is_multibyte || force_singlebyte) 2550 if (force_multibyte)
2551 /* READ_BUFFER already contains valid multibyte forms. */
2410 ; 2552 ;
2411 else if (load_convert_to_unibyte) 2553 else if (force_singlebyte)
2412 { 2554 {
2413 Lisp_Object string; 2555 nchars = str_as_unibyte (read_buffer, p - read_buffer);
2414 to_multibyte (&p, &end, &nchars); 2556 p = read_buffer + nchars;
2415 if (p - read_buffer != nchars)
2416 {
2417 string = make_multibyte_string (read_buffer, nchars,
2418 p - read_buffer);
2419 return Fstring_make_unibyte (string);
2420 }
2421 /* We can make a unibyte string directly. */
2422 is_multibyte = 0;
2423 }
2424 else if (EQ (readcharfun, Qget_file_char)
2425 || EQ (readcharfun, Qlambda))
2426 {
2427 /* Nowadays, reading directly from a file is used only for
2428 compiled Emacs Lisp files, and those always use the
2429 Emacs internal encoding. Meanwhile, Qlambda is used
2430 for reading dynamic byte code (compiled with
2431 byte-compile-dynamic = t). So make the string multibyte
2432 if the string contains any multibyte sequences.
2433 (to_multibyte is a no-op if not.) */
2434 to_multibyte (&p, &end, &nchars);
2435 is_multibyte = (p - read_buffer) != nchars;
2436 } 2557 }
2437 else 2558 else
2438 /* In all other cases, if we read these bytes as 2559 /* Otherwise, READ_BUFFER contains only ASCII. */
2439 separate characters, treat them as separate characters now. */
2440 ; 2560 ;
2441 2561
2442 /* We want readchar_count to be the number of characters, not 2562 /* We want readchar_count to be the number of characters, not
2443 bytes. Hence we adjust for multibyte characters in the 2563 bytes. Hence we adjust for multibyte characters in the
2444 string. ... But it doesn't seem to be necessary, because 2564 string. ... But it doesn't seem to be necessary, because
2445 READCHAR *does* read multibyte characters from buffers. */ 2565 READCHAR *does* read multibyte characters from buffers. */
2446 /* readchar_count -= (p - read_buffer) - nchars; */ 2566 /* readchar_count -= (p - read_buffer) - nchars; */
2447 if (read_pure) 2567 if (read_pure)
2448 return make_pure_string (read_buffer, nchars, p - read_buffer, 2568 return make_pure_string (read_buffer, nchars, p - read_buffer,
2449 is_multibyte); 2569 (force_multibyte
2570 || (p - read_buffer != nchars)));
2450 return make_specified_string (read_buffer, nchars, p - read_buffer, 2571 return make_specified_string (read_buffer, nchars, p - read_buffer,
2451 is_multibyte); 2572 (force_multibyte
2573 || (p - read_buffer != nchars)));
2452 } 2574 }
2453 2575
2454 case '.': 2576 case '.':
2455 { 2577 {
2456 int next_char = READCHAR; 2578 int next_char = READCHAR;
2501 if (c == -1) 2623 if (c == -1)
2502 end_of_file_error (); 2624 end_of_file_error ();
2503 quoted = 1; 2625 quoted = 1;
2504 } 2626 }
2505 2627
2506 if (! SINGLE_BYTE_CHAR_P (c)) 2628 p += CHAR_STRING (c, p);
2507 p += CHAR_STRING (c, p);
2508 else
2509 *p++ = c;
2510
2511 c = READCHAR; 2629 c = READCHAR;
2512 } 2630 }
2513 2631
2514 if (p == end) 2632 if (p == end)
2515 { 2633 {
2539 if (p1 == p) 2657 if (p1 == p)
2540 /* It is an integer. */ 2658 /* It is an integer. */
2541 { 2659 {
2542 if (p1[-1] == '.') 2660 if (p1[-1] == '.')
2543 p1[-1] = '\0'; 2661 p1[-1] = '\0';
2662 /* Fixme: if we have strtol, use that, and check
2663 for overflow. */
2544 if (sizeof (int) == sizeof (EMACS_INT)) 2664 if (sizeof (int) == sizeof (EMACS_INT))
2545 XSETINT (val, atoi (read_buffer)); 2665 XSETINT (val, atoi (read_buffer));
2546 else if (sizeof (long) == sizeof (EMACS_INT)) 2666 else if (sizeof (long) == sizeof (EMACS_INT))
2547 XSETINT (val, atol (read_buffer)); 2667 XSETINT (val, atol (read_buffer));
2548 else 2668 else
2842 but without generating extra garbage and 2962 but without generating extra garbage and
2843 guaranteeing no change in the contents). */ 2963 guaranteeing no change in the contents). */
2844 STRING_SET_CHARS (bytestr, SBYTES (bytestr)); 2964 STRING_SET_CHARS (bytestr, SBYTES (bytestr));
2845 STRING_SET_UNIBYTE (bytestr); 2965 STRING_SET_UNIBYTE (bytestr);
2846 2966
2847 item = Fread (bytestr); 2967 item = Fread (Fcons (bytestr, readcharfun));
2848 if (!CONSP (item)) 2968 if (!CONSP (item))
2849 error ("invalid byte code"); 2969 error ("invalid byte code");
2850 2970
2851 otem = XCONS (item); 2971 otem = XCONS (item);
2852 bytestr = XCAR (item); 2972 bytestr = XCAR (item);
2854 free_cons (otem); 2974 free_cons (otem);
2855 } 2975 }
2856 2976
2857 /* Now handle the bytecode slot. */ 2977 /* Now handle the bytecode slot. */
2858 ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr; 2978 ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr;
2979 }
2980 else if (i == COMPILED_DOC_STRING
2981 && STRINGP (item)
2982 && ! STRING_MULTIBYTE (item))
2983 {
2984 if (EQ (readcharfun, Qget_emacs_mule_file_char))
2985 item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil);
2986 else
2987 item = Fstring_as_multibyte (item);
2859 } 2988 }
2860 } 2989 }
2861 ptr[i] = read_pure ? Fpurecopy (item) : item; 2990 ptr[i] = read_pure ? Fpurecopy (item) : item;
2862 otem = XCONS (tem); 2991 otem = XCONS (tem);
2863 tem = Fcdr (tem); 2992 tem = Fcdr (tem);
2952 if (doc_reference == 1) 3081 if (doc_reference == 1)
2953 return make_number (0); 3082 return make_number (0);
2954 if (doc_reference == 2) 3083 if (doc_reference == 2)
2955 { 3084 {
2956 /* Get a doc string from the file we are loading. 3085 /* Get a doc string from the file we are loading.
2957 If it's in saved_doc_string, get it from there. */ 3086 If it's in saved_doc_string, get it from there.
3087
3088 Here, we don't know if the string is a
3089 bytecode string or a doc string. As a
3090 bytecode string must be unibyte, we always
3091 return a unibyte string. If it is actually a
3092 doc string, caller must make it
3093 multibyte. */
3094
2958 int pos = XINT (XCDR (val)); 3095 int pos = XINT (XCDR (val));
2959 /* Position is negative for user variables. */ 3096 /* Position is negative for user variables. */
2960 if (pos < 0) pos = -pos; 3097 if (pos < 0) pos = -pos;
2961 if (pos >= saved_doc_string_position 3098 if (pos >= saved_doc_string_position
2962 && pos < (saved_doc_string_position 3099 && pos < (saved_doc_string_position
2984 } 3121 }
2985 else 3122 else
2986 saved_doc_string[to++] = c; 3123 saved_doc_string[to++] = c;
2987 } 3124 }
2988 3125
2989 return make_string (saved_doc_string + start, 3126 return make_unibyte_string (saved_doc_string + start,
2990 to - start); 3127 to - start);
2991 } 3128 }
2992 /* Look in prev_saved_doc_string the same way. */ 3129 /* Look in prev_saved_doc_string the same way. */
2993 else if (pos >= prev_saved_doc_string_position 3130 else if (pos >= prev_saved_doc_string_position
2994 && pos < (prev_saved_doc_string_position 3131 && pos < (prev_saved_doc_string_position
2995 + prev_saved_doc_string_length)) 3132 + prev_saved_doc_string_length))
3016 } 3153 }
3017 else 3154 else
3018 prev_saved_doc_string[to++] = c; 3155 prev_saved_doc_string[to++] = c;
3019 } 3156 }
3020 3157
3021 return make_string (prev_saved_doc_string + start, 3158 return make_unibyte_string (prev_saved_doc_string
3022 to - start); 3159 + start,
3160 to - start);
3023 } 3161 }
3024 else 3162 else
3025 return get_doc_string (val, 0, 0); 3163 return get_doc_string (val, 1, 0);
3026 } 3164 }
3027 3165
3028 return val; 3166 return val;
3029 } 3167 }
3030 return Fsignal (Qinvalid_read_syntax, Fcons (make_string (". in wrong context", 18), Qnil)); 3168 return Fsignal (Qinvalid_read_syntax, Fcons (make_string (". in wrong context", 18), Qnil));
3935 staticpro (&Qread_char); 4073 staticpro (&Qread_char);
3936 4074
3937 Qget_file_char = intern ("get-file-char"); 4075 Qget_file_char = intern ("get-file-char");
3938 staticpro (&Qget_file_char); 4076 staticpro (&Qget_file_char);
3939 4077
4078 Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char");
4079 staticpro (&Qget_emacs_mule_file_char);
4080
4081 Qload_force_doc_strings = intern ("load-force-doc-strings");
4082 staticpro (&Qload_force_doc_strings);
4083
3940 Qbackquote = intern ("`"); 4084 Qbackquote = intern ("`");
3941 staticpro (&Qbackquote); 4085 staticpro (&Qbackquote);
3942 Qcomma = intern (","); 4086 Qcomma = intern (",");
3943 staticpro (&Qcomma); 4087 staticpro (&Qcomma);
3944 Qcomma_at = intern (",@"); 4088 Qcomma_at = intern (",@");