Mercurial > emacs
comparison src/lread.c @ 43080:a6382f0fcb2a
(read1): Redesign strategy for force_multibyte and
force_singlebyte. Now is_multibyte records whether read_buffer
is multibyte. Encountering any multibyte character makes it so.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Sun, 03 Feb 2002 10:35:20 +0000 |
parents | 2fc2abcdc67c |
children | 4bd6b6b21deb 0b4249d736a0 |
comparison
equal
deleted
inserted
replaced
43079:a202d9fb56eb | 43080:a6382f0fcb2a |
---|---|
2143 case '"': | 2143 case '"': |
2144 { | 2144 { |
2145 char *p = read_buffer; | 2145 char *p = read_buffer; |
2146 char *end = read_buffer + read_buffer_size; | 2146 char *end = read_buffer + read_buffer_size; |
2147 register int c; | 2147 register int c; |
2148 /* Nonzero if we saw an escape sequence specifying | 2148 /* 1 if we saw an escape sequence specifying |
2149 a multibyte character. */ | 2149 a multibyte character, or a multibyte character. */ |
2150 int force_multibyte = 0; | 2150 int force_multibyte = 0; |
2151 /* Nonzero if we saw an escape sequence specifying | 2151 /* 1 if we saw an escape sequence specifying |
2152 a single-byte character. */ | 2152 a single-byte character. */ |
2153 int force_singlebyte = 0; | 2153 int force_singlebyte = 0; |
2154 /* 1 if read_buffer contains multibyte text now. */ | |
2155 int is_multibyte = 0; | |
2154 int cancel = 0; | 2156 int cancel = 0; |
2155 int nchars; | 2157 int nchars = 0; |
2156 | 2158 |
2157 while ((c = READCHAR) >= 0 | 2159 while ((c = READCHAR) >= 0 |
2158 && c != '\"') | 2160 && c != '\"') |
2159 { | 2161 { |
2160 if (end - p < MAX_MULTIBYTE_LENGTH) | 2162 if (end - p < MAX_MULTIBYTE_LENGTH) |
2184 force_singlebyte = 1; | 2186 force_singlebyte = 1; |
2185 else if (byterep == 2) | 2187 else if (byterep == 2) |
2186 force_multibyte = 1; | 2188 force_multibyte = 1; |
2187 } | 2189 } |
2188 | 2190 |
2189 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) | 2191 /* A character that must be multibyte forces multibyte. */ |
2192 if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK)) | |
2193 force_multibyte = 1; | |
2194 | |
2195 /* If we just discovered the need to be multibyte, | |
2196 convert the text accumulated thus far. */ | |
2197 if (force_multibyte && ! is_multibyte) | |
2190 { | 2198 { |
2191 /* Any modifiers for a multibyte character are invalid. */ | 2199 is_multibyte = 1; |
2192 if (c & CHAR_MODIFIER_MASK) | 2200 to_multibyte (&p, &end, &nchars); |
2193 error ("Invalid modifier in string"); | |
2194 p += CHAR_STRING (c, p); | |
2195 force_multibyte = 1; | |
2196 } | 2201 } |
2202 | |
2203 /* Allow `\C- ' and `\C-?'. */ | |
2204 if (c == (CHAR_CTL | ' ')) | |
2205 c = 0; | |
2206 else if (c == (CHAR_CTL | '?')) | |
2207 c = 127; | |
2208 | |
2209 if (c & CHAR_SHIFT) | |
2210 { | |
2211 /* Shift modifier is valid only with [A-Za-z]. */ | |
2212 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') | |
2213 c &= ~CHAR_SHIFT; | |
2214 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') | |
2215 c = (c & ~CHAR_SHIFT) - ('a' - 'A'); | |
2216 } | |
2217 | |
2218 if (c & CHAR_META) | |
2219 /* Move the meta bit to the right place for a string. */ | |
2220 c = (c & ~CHAR_META) | 0x80; | |
2221 if (c & CHAR_MODIFIER_MASK) | |
2222 error ("Invalid modifier in string"); | |
2223 | |
2224 if (is_multibyte) | |
2225 p += CHAR_STRING (c, p); | |
2197 else | 2226 else |
2198 { | 2227 *p++ = c; |
2199 /* Allow `\C- ' and `\C-?'. */ | 2228 |
2200 if (c == (CHAR_CTL | ' ')) | 2229 nchars++; |
2201 c = 0; | |
2202 else if (c == (CHAR_CTL | '?')) | |
2203 c = 127; | |
2204 | |
2205 if (c & CHAR_SHIFT) | |
2206 { | |
2207 /* Shift modifier is valid only with [A-Za-z]. */ | |
2208 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') | |
2209 c &= ~CHAR_SHIFT; | |
2210 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') | |
2211 c = (c & ~CHAR_SHIFT) - ('a' - 'A'); | |
2212 } | |
2213 | |
2214 if (c & CHAR_META) | |
2215 /* Move the meta bit to the right place for a string. */ | |
2216 c = (c & ~CHAR_META) | 0x80; | |
2217 if (c & ~0xff) | |
2218 error ("Invalid modifier in string"); | |
2219 *p++ = c; | |
2220 } | |
2221 } | 2230 } |
2231 | |
2222 if (c < 0) | 2232 if (c < 0) |
2223 end_of_file_error (); | 2233 end_of_file_error (); |
2224 | 2234 |
2225 /* If purifying, and string starts with \ newline, | 2235 /* If purifying, and string starts with \ newline, |
2226 return zero instead. This is for doc strings | 2236 return zero instead. This is for doc strings |
2227 that we are really going to find in etc/DOC.nn.nn */ | 2237 that we are really going to find in etc/DOC.nn.nn */ |
2228 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) | 2238 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) |
2229 return make_number (0); | 2239 return make_number (0); |
2230 | 2240 |
2231 if (force_multibyte) | 2241 if (is_multibyte || force_singlebyte) |
2232 to_multibyte (&p, &end, &nchars); | 2242 ; |
2233 else if (force_singlebyte) | |
2234 nchars = p - read_buffer; | |
2235 else if (load_convert_to_unibyte) | 2243 else if (load_convert_to_unibyte) |
2236 { | 2244 { |
2237 Lisp_Object string; | 2245 Lisp_Object string; |
2238 to_multibyte (&p, &end, &nchars); | 2246 to_multibyte (&p, &end, &nchars); |
2239 if (p - read_buffer != nchars) | 2247 if (p - read_buffer != nchars) |
2240 { | 2248 { |
2241 string = make_multibyte_string (read_buffer, nchars, | 2249 string = make_multibyte_string (read_buffer, nchars, |
2242 p - read_buffer); | 2250 p - read_buffer); |
2243 return Fstring_make_unibyte (string); | 2251 return Fstring_make_unibyte (string); |
2244 } | 2252 } |
2253 /* We can make a unibyte string directly. */ | |
2254 is_multibyte = 0; | |
2245 } | 2255 } |
2246 else if (EQ (readcharfun, Qget_file_char) | 2256 else if (EQ (readcharfun, Qget_file_char) |
2247 || EQ (readcharfun, Qlambda)) | 2257 || EQ (readcharfun, Qlambda)) |
2248 { | 2258 { |
2249 /* Nowadays, reading directly from a file is used only for | 2259 /* Nowadays, reading directly from a file is used only for |
2250 compiled Emacs Lisp files, and those always use the | 2260 compiled Emacs Lisp files, and those always use the |
2251 Emacs internal encoding. Meanwhile, Qlambda is used | 2261 Emacs internal encoding. Meanwhile, Qlambda is used |
2252 for reading dynamic byte code (compiled with | 2262 for reading dynamic byte code (compiled with |
2253 byte-compile-dynamic = t). */ | 2263 byte-compile-dynamic = t). */ |
2254 to_multibyte (&p, &end, &nchars); | 2264 to_multibyte (&p, &end, &nchars); |
2265 is_multibyte = 1; | |
2255 } | 2266 } |
2256 else | 2267 else |
2257 /* In all other cases, if we read these bytes as | 2268 /* In all other cases, if we read these bytes as |
2258 separate characters, treat them as separate characters now. */ | 2269 separate characters, treat them as separate characters now. */ |
2259 nchars = p - read_buffer; | 2270 ; |
2260 | 2271 |
2261 if (read_pure) | 2272 if (read_pure) |
2262 return make_pure_string (read_buffer, nchars, p - read_buffer, | 2273 return make_pure_string (read_buffer, nchars, p - read_buffer, |
2263 (force_multibyte | 2274 is_multibyte); |
2264 || (p - read_buffer != nchars))); | |
2265 return make_specified_string (read_buffer, nchars, p - read_buffer, | 2275 return make_specified_string (read_buffer, nchars, p - read_buffer, |
2266 (force_multibyte | 2276 is_multibyte); |
2267 || (p - read_buffer != nchars))); | |
2268 } | 2277 } |
2269 | 2278 |
2270 case '.': | 2279 case '.': |
2271 { | 2280 { |
2272 int next_char = READCHAR; | 2281 int next_char = READCHAR; |