Mercurial > emacs
comparison src/lread.c @ 21724:12a826a37249
(read_from_string_index_byte): New variable.
(read_from_string_index): Now counts characters.
(readchar, unreadchar, Fread_from_string): Changed accordingly.
(readchar): Read a multibyte char all at once
from a buffer, marker or string.
(unreadchar): Unread a multibyte char all at once.
(read1): Properly handle non-escaped multibyte chars.
They force a string to be multibyte.
When reading direct from a file, any multibyte sequence means
a multibyte string.
Insist on MAX_LENGTH_OF_MULTI_BYTE_FORM bytes when checking
for read_buffer full; this way need not check specially for multibyte.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Thu, 23 Apr 1998 21:22:51 +0000 |
parents | f42b78e033db |
children | f654c3b16214 |
comparison
equal
deleted
inserted
replaced
21723:edc95c28d970 | 21724:12a826a37249 |
---|---|
129 /* When nonzero, read conses in pure space */ | 129 /* When nonzero, read conses in pure space */ |
130 static int read_pure; | 130 static int read_pure; |
131 | 131 |
132 /* For use within read-from-string (this reader is non-reentrant!!) */ | 132 /* For use within read-from-string (this reader is non-reentrant!!) */ |
133 static int read_from_string_index; | 133 static int read_from_string_index; |
134 static int read_from_string_index_byte; | |
134 static int read_from_string_limit; | 135 static int read_from_string_limit; |
135 | 136 |
136 /* Number of bytes left to read in the buffer character | 137 /* Number of bytes left to read in the buffer character |
137 that `readchar' has already advanced over. */ | 138 that `readchar' has already advanced over. */ |
138 static int readchar_backlog; | 139 static int readchar_backlog; |
167 static int | 168 static int |
168 readchar (readcharfun) | 169 readchar (readcharfun) |
169 Lisp_Object readcharfun; | 170 Lisp_Object readcharfun; |
170 { | 171 { |
171 Lisp_Object tem; | 172 Lisp_Object tem; |
172 register struct buffer *inbuffer; | |
173 register int c, mpos; | 173 register int c, mpos; |
174 | 174 |
175 if (BUFFERP (readcharfun)) | 175 if (BUFFERP (readcharfun)) |
176 { | 176 { |
177 inbuffer = XBUFFER (readcharfun); | 177 register struct buffer *inbuffer = XBUFFER (readcharfun); |
178 | 178 |
179 if (readchar_backlog == 0) | 179 int pt_byte = BUF_PT_BYTE (inbuffer); |
180 { | 180 int orig_pt_byte = pt_byte; |
181 int pt_byte = BUF_PT_BYTE (inbuffer); | 181 |
182 int orig_pt_byte = pt_byte; | 182 if (pt_byte >= BUF_ZV_BYTE (inbuffer)) |
183 | 183 return -1; |
184 if (pt_byte >= BUF_ZV_BYTE (inbuffer)) | 184 |
185 return -1; | 185 if (! NILP (inbuffer->enable_multibyte_characters)) |
186 | 186 { |
187 if (! NILP (inbuffer->enable_multibyte_characters)) | 187 unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte); |
188 BUF_INC_POS (inbuffer, pt_byte); | 188 BUF_INC_POS (inbuffer, pt_byte); |
189 else | 189 c = STRING_CHAR (p, pt_byte - orig_pt_byte); |
190 pt_byte++; | 190 } |
191 SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); | 191 else |
192 readchar_backlog = pt_byte - orig_pt_byte; | 192 { |
193 } | 193 c = BUF_FETCH_BYTE (inbuffer, pt_byte); |
194 | 194 pt_byte++; |
195 /* We get the address of the byte just passed, | 195 } |
196 which is the last byte of the character. | 196 SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); |
197 The other bytes in this character are consecutive with it, | 197 |
198 because the gap can't be in the middle of a character. */ | 198 return c; |
199 return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1) | |
200 - --readchar_backlog); | |
201 } | 199 } |
202 if (MARKERP (readcharfun)) | 200 if (MARKERP (readcharfun)) |
203 { | 201 { |
204 inbuffer = XMARKER (readcharfun)->buffer; | 202 register struct buffer *inbuffer = XMARKER (readcharfun)->buffer; |
205 | 203 |
206 if (readchar_backlog == 0) | 204 int bytepos = marker_byte_position (readcharfun); |
207 { | 205 int orig_bytepos = bytepos; |
208 int bytepos = marker_byte_position (readcharfun); | 206 |
209 int orig_bytepos = bytepos; | 207 if (bytepos >= BUF_ZV_BYTE (inbuffer)) |
210 | 208 return -1; |
211 if (bytepos >= BUF_ZV_BYTE (inbuffer)) | 209 |
212 return -1; | 210 if (! NILP (inbuffer->enable_multibyte_characters)) |
213 | 211 { |
214 if (! NILP (inbuffer->enable_multibyte_characters)) | 212 unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos); |
215 INC_POS (bytepos); | 213 BUF_INC_POS (inbuffer, bytepos); |
216 else | 214 c = STRING_CHAR (p, bytepos - orig_bytepos); |
217 bytepos++; | 215 } |
218 XMARKER (readcharfun)->bytepos = bytepos; | 216 else |
219 XMARKER (readcharfun)->charpos++; | 217 { |
220 | 218 c = BUF_FETCH_BYTE (inbuffer, bytepos); |
221 readchar_backlog = bytepos - orig_bytepos; | 219 bytepos++; |
222 } | 220 } |
223 | 221 |
224 /* We get the address of the byte just passed, | 222 XMARKER (readcharfun)->bytepos = bytepos; |
225 which is the last byte of the character. | 223 XMARKER (readcharfun)->charpos++; |
226 The other bytes in this character are consecutive with it, | 224 |
227 because the gap can't be in the middle of a character. */ | 225 return c; |
228 return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1) | |
229 - --readchar_backlog); | |
230 } | 226 } |
231 if (EQ (readcharfun, Qget_file_char)) | 227 if (EQ (readcharfun, Qget_file_char)) |
232 { | 228 { |
233 c = getc (instream); | 229 c = getc (instream); |
234 #ifdef EINTR | 230 #ifdef EINTR |
242 return c; | 238 return c; |
243 } | 239 } |
244 | 240 |
245 if (STRINGP (readcharfun)) | 241 if (STRINGP (readcharfun)) |
246 { | 242 { |
247 register int c; | 243 if (read_from_string_index >= read_from_string_limit) |
248 /* This used to be return of a conditional expression, | 244 c = -1; |
249 but that truncated -1 to a char on VMS. */ | 245 else if (STRING_MULTIBYTE (readcharfun)) |
250 if (read_from_string_index < read_from_string_limit) | 246 FETCH_STRING_CHAR_ADVANCE (c, readcharfun, |
247 read_from_string_index, | |
248 read_from_string_index_byte); | |
249 else | |
251 c = XSTRING (readcharfun)->data[read_from_string_index++]; | 250 c = XSTRING (readcharfun)->data[read_from_string_index++]; |
252 else | |
253 c = -1; | |
254 | 251 |
255 return c; | 252 return c; |
256 } | 253 } |
257 | 254 |
258 tem = call0 (readcharfun); | 255 tem = call0 (readcharfun); |
274 /* Don't back up the pointer if we're unreading the end-of-input mark, | 271 /* Don't back up the pointer if we're unreading the end-of-input mark, |
275 since readchar didn't advance it when we read it. */ | 272 since readchar didn't advance it when we read it. */ |
276 ; | 273 ; |
277 else if (BUFFERP (readcharfun)) | 274 else if (BUFFERP (readcharfun)) |
278 { | 275 { |
279 if (!SINGLE_BYTE_CHAR_P (c)) | 276 struct buffer *b = XBUFFER (readcharfun); |
280 readchar_backlog++; | 277 int bytepos = BUF_PT_BYTE (b); |
278 | |
279 BUF_PT (b)--; | |
280 if (! NILP (b->enable_multibyte_characters)) | |
281 BUF_DEC_POS (b, bytepos); | |
281 else | 282 else |
282 { | 283 bytepos--; |
283 struct buffer *b = XBUFFER (readcharfun); | 284 |
284 int bytepos = BUF_PT_BYTE (b); | 285 BUF_PT_BYTE (b) = bytepos; |
285 | |
286 BUF_PT (b)--; | |
287 if (! NILP (b->enable_multibyte_characters)) | |
288 BUF_DEC_POS (b, bytepos); | |
289 else | |
290 bytepos--; | |
291 | |
292 BUF_PT_BYTE (b) = bytepos; | |
293 } | |
294 } | 286 } |
295 else if (MARKERP (readcharfun)) | 287 else if (MARKERP (readcharfun)) |
296 { | 288 { |
297 if (!SINGLE_BYTE_CHAR_P (c)) | 289 struct buffer *b = XMARKER (readcharfun)->buffer; |
298 readchar_backlog++; | 290 int bytepos = XMARKER (readcharfun)->bytepos; |
291 | |
292 XMARKER (readcharfun)->charpos--; | |
293 if (! NILP (b->enable_multibyte_characters)) | |
294 BUF_DEC_POS (b, bytepos); | |
299 else | 295 else |
300 { | 296 bytepos--; |
301 struct buffer *b = XMARKER (readcharfun)->buffer; | 297 |
302 int bytepos = XMARKER (readcharfun)->bytepos; | 298 XMARKER (readcharfun)->bytepos = bytepos; |
303 | |
304 XMARKER (readcharfun)->charpos--; | |
305 if (! NILP (b->enable_multibyte_characters)) | |
306 BUF_DEC_POS (b, bytepos); | |
307 else | |
308 bytepos--; | |
309 | |
310 XMARKER (readcharfun)->bytepos = bytepos; | |
311 } | |
312 } | 299 } |
313 else if (STRINGP (readcharfun)) | 300 else if (STRINGP (readcharfun)) |
314 read_from_string_index--; | 301 { |
302 read_from_string_index--; | |
303 read_from_string_index_byte | |
304 = string_char_to_byte (readcharfun, read_from_string_index); | |
305 } | |
315 else if (EQ (readcharfun, Qget_file_char)) | 306 else if (EQ (readcharfun, Qget_file_char)) |
316 ungetc (c, instream); | 307 ungetc (c, instream); |
317 else | 308 else |
318 call1 (readcharfun, make_number (c)); | 309 call1 (readcharfun, make_number (c)); |
319 } | 310 } |
320 | 311 |
321 static Lisp_Object read0 (), read1 (), read_list (), read_vector (); | 312 static Lisp_Object read0 (), read1 (), read_list (), read_vector (); |
322 static int read_multibyte (); | 313 static int read_multibyte (); |
323 | 314 |
324 /* get a character from the tty */ | 315 /* Get a character from the tty. */ |
325 | 316 |
326 extern Lisp_Object read_char (); | 317 extern Lisp_Object read_char (); |
327 | 318 |
328 /* Read input events until we get one that's acceptable for our purposes. | 319 /* Read input events until we get one that's acceptable for our purposes. |
329 | 320 |
1178 Lisp_Object tem; | 1169 Lisp_Object tem; |
1179 | 1170 |
1180 CHECK_STRING (string,0); | 1171 CHECK_STRING (string,0); |
1181 | 1172 |
1182 if (NILP (end)) | 1173 if (NILP (end)) |
1183 endval = STRING_BYTES (XSTRING (string)); | 1174 endval = XSTRING (string)->size; |
1184 else | 1175 else |
1185 { | 1176 { |
1186 CHECK_NUMBER (end, 2); | 1177 CHECK_NUMBER (end, 2); |
1187 endval = string_char_to_byte (string, XINT (end)); | 1178 if (endval < 0 || endval > XSTRING (string)->size) |
1188 if (endval < 0 || endval > STRING_BYTES (XSTRING (string))) | |
1189 args_out_of_range (string, end); | 1179 args_out_of_range (string, end); |
1190 } | 1180 } |
1191 | 1181 |
1192 if (NILP (start)) | 1182 if (NILP (start)) |
1193 startval = 0; | 1183 startval = 0; |
1194 else | 1184 else |
1195 { | 1185 { |
1196 CHECK_NUMBER (start, 1); | 1186 CHECK_NUMBER (start, 1); |
1197 startval = string_char_to_byte (string, XINT (start)); | |
1198 if (startval < 0 || startval > endval) | 1187 if (startval < 0 || startval > endval) |
1199 args_out_of_range (string, start); | 1188 args_out_of_range (string, start); |
1200 } | 1189 } |
1201 | 1190 |
1202 read_from_string_index = startval; | 1191 read_from_string_index = startval; |
1192 read_from_string_index_byte = string_char_to_byte (string, startval); | |
1203 read_from_string_limit = endval; | 1193 read_from_string_limit = endval; |
1204 | 1194 |
1205 new_backquote_flag = 0; | 1195 new_backquote_flag = 0; |
1206 read_objects = Qnil; | 1196 read_objects = Qnil; |
1207 | 1197 |
1208 tem = read0 (string); | 1198 tem = read0 (string); |
1209 endval = string_byte_to_char (string, | 1199 return Fcons (tem, make_number (read_from_string_index)); |
1210 read_from_string_index); | |
1211 return Fcons (tem, make_number (endval)); | |
1212 } | 1200 } |
1213 | 1201 |
1214 /* Use this for recursive reads, in contexts where internal tokens | 1202 /* Use this for recursive reads, in contexts where internal tokens |
1215 are not allowed. */ | 1203 are not allowed. */ |
1216 | 1204 |
1742 int nchars; | 1730 int nchars; |
1743 | 1731 |
1744 while ((c = READCHAR) >= 0 | 1732 while ((c = READCHAR) >= 0 |
1745 && c != '\"') | 1733 && c != '\"') |
1746 { | 1734 { |
1747 if (p == end) | 1735 if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM) |
1748 { | 1736 { |
1749 char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); | 1737 char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); |
1750 p += new - read_buffer; | 1738 p += new - read_buffer; |
1751 read_buffer += new - read_buffer; | 1739 read_buffer += new - read_buffer; |
1752 end = read_buffer + read_buffer_size; | 1740 end = read_buffer + read_buffer_size; |
1753 } | 1741 } |
1742 | |
1754 if (c == '\\') | 1743 if (c == '\\') |
1755 { | 1744 { |
1756 c = read_escape (readcharfun, 1); | 1745 c = read_escape (readcharfun, 1); |
1757 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))) | 1746 |
1747 /* C is -1 if \ newline has just been seen */ | |
1748 if (c == -1) | |
1758 { | 1749 { |
1759 unsigned char workbuf[4]; | 1750 if (p == read_buffer) |
1760 unsigned char *str = workbuf; | 1751 cancel = 1; |
1761 int length; | |
1762 | |
1763 length = non_ascii_char_to_string (c, workbuf, &str); | |
1764 if (length > 1) | |
1765 force_multibyte = 1; | |
1766 | |
1767 if (p + length > end) | |
1768 { | |
1769 char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); | |
1770 p += new - read_buffer; | |
1771 read_buffer += new - read_buffer; | |
1772 end = read_buffer + read_buffer_size; | |
1773 } | |
1774 | |
1775 bcopy (str, p, length); | |
1776 p += length; | |
1777 continue; | 1752 continue; |
1778 } | 1753 } |
1754 | |
1779 /* If an escape specifies a non-ASCII single-byte character, | 1755 /* If an escape specifies a non-ASCII single-byte character, |
1780 this must be a unibyte string. */ | 1756 this must be a unibyte string. */ |
1781 else if (! ASCII_BYTE_P (c)) | 1757 if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)) |
1758 && ! ASCII_BYTE_P (c)) | |
1782 force_singlebyte = 1; | 1759 force_singlebyte = 1; |
1783 } | 1760 } |
1784 | 1761 |
1785 /* c is -1 if \ newline has just been seen */ | 1762 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))) |
1786 if (c == -1) | |
1787 { | 1763 { |
1788 if (p == read_buffer) | 1764 unsigned char workbuf[4]; |
1789 cancel = 1; | 1765 unsigned char *str = workbuf; |
1766 int length; | |
1767 | |
1768 length = non_ascii_char_to_string (c, workbuf, &str); | |
1769 if (length > 1) | |
1770 force_multibyte = 1; | |
1771 | |
1772 bcopy (str, p, length); | |
1773 p += length; | |
1790 } | 1774 } |
1791 else | 1775 else |
1792 { | 1776 { |
1793 /* Allow `\C- ' and `\C-?'. */ | 1777 /* Allow `\C- ' and `\C-?'. */ |
1794 if (c == (CHAR_CTL | ' ')) | 1778 if (c == (CHAR_CTL | ' ')) |
1812 that we are really going to find in etc/DOC.nn.nn */ | 1796 that we are really going to find in etc/DOC.nn.nn */ |
1813 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) | 1797 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) |
1814 return make_number (0); | 1798 return make_number (0); |
1815 | 1799 |
1816 if (force_singlebyte && force_multibyte) | 1800 if (force_singlebyte && force_multibyte) |
1817 error ("Multibyte and single-byte escapes in one string constant"); | 1801 error ("Multibyte and unibyte characters in one string constant"); |
1818 | 1802 |
1819 if (force_singlebyte) | 1803 if (force_singlebyte) |
1820 nchars = p - read_buffer; | 1804 nchars = p - read_buffer; |
1821 else if (force_multibyte) | 1805 else if (force_multibyte) |
1822 nchars = multibyte_chars_in_text (read_buffer, p - read_buffer); | 1806 nchars = multibyte_chars_in_text (read_buffer, p - read_buffer); |
1829 string = make_multibyte_string (read_buffer, nchars, | 1813 string = make_multibyte_string (read_buffer, nchars, |
1830 p - read_buffer); | 1814 p - read_buffer); |
1831 return Fstring_make_unibyte (string); | 1815 return Fstring_make_unibyte (string); |
1832 } | 1816 } |
1833 } | 1817 } |
1818 else if (EQ (readcharfun, Qget_file_char)) | |
1819 /* Nowadays, reading directly from a file | |
1820 is used only for compiled Emacs Lisp files, | |
1821 and those always use the Emacs internal encoding. */ | |
1822 nchars = multibyte_chars_in_text (read_buffer, p - read_buffer); | |
1834 else | 1823 else |
1824 /* In all other cases, if we read these bytes as | |
1825 separate characters, treat them as separate characters now. */ | |
1835 nchars = p - read_buffer; | 1826 nchars = p - read_buffer; |
1836 | 1827 |
1837 if (read_pure) | 1828 if (read_pure) |
1838 return make_pure_string (read_buffer, nchars, p - read_buffer, | 1829 return make_pure_string (read_buffer, nchars, p - read_buffer, |
1839 (force_multibyte | 1830 (force_multibyte |
1882 || c =='.' | 1873 || c =='.' |
1883 #endif /* not LISP_FLOAT_TYPE */ | 1874 #endif /* not LISP_FLOAT_TYPE */ |
1884 || c == '[' || c == ']' || c == '#' | 1875 || c == '[' || c == ']' || c == '#' |
1885 )) | 1876 )) |
1886 { | 1877 { |
1887 if (p == end) | 1878 if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM) |
1888 { | 1879 { |
1889 register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); | 1880 register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); |
1890 p += new - read_buffer; | 1881 p += new - read_buffer; |
1891 read_buffer += new - read_buffer; | 1882 read_buffer += new - read_buffer; |
1892 end = read_buffer + read_buffer_size; | 1883 end = read_buffer + read_buffer_size; |
1895 { | 1886 { |
1896 c = READCHAR; | 1887 c = READCHAR; |
1897 quoted = 1; | 1888 quoted = 1; |
1898 } | 1889 } |
1899 | 1890 |
1900 *p++ = c; | 1891 if (! SINGLE_BYTE_CHAR_P (c)) |
1892 { | |
1893 unsigned char workbuf[4]; | |
1894 unsigned char *str = workbuf; | |
1895 int length; | |
1896 | |
1897 length = non_ascii_char_to_string (c, workbuf, &str); | |
1898 | |
1899 bcopy (str, p, length); | |
1900 p += length; | |
1901 } | |
1902 else | |
1903 *p++ = c; | |
1901 | 1904 |
1902 c = READCHAR; | 1905 c = READCHAR; |
1903 } | 1906 } |
1904 | 1907 |
1905 if (p == end) | 1908 if (p == end) |
2551 Vpurify_flag = Qt; | 2554 Vpurify_flag = Qt; |
2552 | 2555 |
2553 Qvariable_documentation = intern ("variable-documentation"); | 2556 Qvariable_documentation = intern ("variable-documentation"); |
2554 staticpro (&Qvariable_documentation); | 2557 staticpro (&Qvariable_documentation); |
2555 | 2558 |
2556 read_buffer_size = 100; | 2559 read_buffer_size = 100 + MAX_LENGTH_OF_MULTI_BYTE_FORM; |
2557 read_buffer = (char *) malloc (read_buffer_size); | 2560 read_buffer = (char *) malloc (read_buffer_size); |
2558 } | 2561 } |
2559 | 2562 |
2560 void | 2563 void |
2561 defsubr (sname) | 2564 defsubr (sname) |