comparison src/lread.c @ 43080:a6382f0fcb2a

(read1): Redesign strategy for force_multibyte and force_singlebyte. Now is_multibyte records whether read_buffer is multibyte. Encountering any multibyte character makes it so.
author Richard M. Stallman <rms@gnu.org>
date Sun, 03 Feb 2002 10:35:20 +0000
parents 2fc2abcdc67c
children 4bd6b6b21deb 0b4249d736a0
comparison
equal deleted inserted replaced
43079:a202d9fb56eb 43080:a6382f0fcb2a
2143 case '"': 2143 case '"':
2144 { 2144 {
2145 char *p = read_buffer; 2145 char *p = read_buffer;
2146 char *end = read_buffer + read_buffer_size; 2146 char *end = read_buffer + read_buffer_size;
2147 register int c; 2147 register int c;
2148 /* Nonzero if we saw an escape sequence specifying 2148 /* 1 if we saw an escape sequence specifying
2149 a multibyte character. */ 2149 a multibyte character, or a multibyte character. */
2150 int force_multibyte = 0; 2150 int force_multibyte = 0;
2151 /* Nonzero if we saw an escape sequence specifying 2151 /* 1 if we saw an escape sequence specifying
2152 a single-byte character. */ 2152 a single-byte character. */
2153 int force_singlebyte = 0; 2153 int force_singlebyte = 0;
2154 /* 1 if read_buffer contains multibyte text now. */
2155 int is_multibyte = 0;
2154 int cancel = 0; 2156 int cancel = 0;
2155 int nchars; 2157 int nchars = 0;
2156 2158
2157 while ((c = READCHAR) >= 0 2159 while ((c = READCHAR) >= 0
2158 && c != '\"') 2160 && c != '\"')
2159 { 2161 {
2160 if (end - p < MAX_MULTIBYTE_LENGTH) 2162 if (end - p < MAX_MULTIBYTE_LENGTH)
2184 force_singlebyte = 1; 2186 force_singlebyte = 1;
2185 else if (byterep == 2) 2187 else if (byterep == 2)
2186 force_multibyte = 1; 2188 force_multibyte = 1;
2187 } 2189 }
2188 2190
2189 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) 2191 /* A character that must be multibyte forces multibyte. */
2192 if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
2193 force_multibyte = 1;
2194
2195 /* If we just discovered the need to be multibyte,
2196 convert the text accumulated thus far. */
2197 if (force_multibyte && ! is_multibyte)
2190 { 2198 {
2191 /* Any modifiers for a multibyte character are invalid. */ 2199 is_multibyte = 1;
2192 if (c & CHAR_MODIFIER_MASK) 2200 to_multibyte (&p, &end, &nchars);
2193 error ("Invalid modifier in string");
2194 p += CHAR_STRING (c, p);
2195 force_multibyte = 1;
2196 } 2201 }
2202
2203 /* Allow `\C- ' and `\C-?'. */
2204 if (c == (CHAR_CTL | ' '))
2205 c = 0;
2206 else if (c == (CHAR_CTL | '?'))
2207 c = 127;
2208
2209 if (c & CHAR_SHIFT)
2210 {
2211 /* Shift modifier is valid only with [A-Za-z]. */
2212 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
2213 c &= ~CHAR_SHIFT;
2214 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
2215 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
2216 }
2217
2218 if (c & CHAR_META)
2219 /* Move the meta bit to the right place for a string. */
2220 c = (c & ~CHAR_META) | 0x80;
2221 if (c & CHAR_MODIFIER_MASK)
2222 error ("Invalid modifier in string");
2223
2224 if (is_multibyte)
2225 p += CHAR_STRING (c, p);
2197 else 2226 else
2198 { 2227 *p++ = c;
2199 /* Allow `\C- ' and `\C-?'. */ 2228
2200 if (c == (CHAR_CTL | ' ')) 2229 nchars++;
2201 c = 0;
2202 else if (c == (CHAR_CTL | '?'))
2203 c = 127;
2204
2205 if (c & CHAR_SHIFT)
2206 {
2207 /* Shift modifier is valid only with [A-Za-z]. */
2208 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
2209 c &= ~CHAR_SHIFT;
2210 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
2211 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
2212 }
2213
2214 if (c & CHAR_META)
2215 /* Move the meta bit to the right place for a string. */
2216 c = (c & ~CHAR_META) | 0x80;
2217 if (c & ~0xff)
2218 error ("Invalid modifier in string");
2219 *p++ = c;
2220 }
2221 } 2230 }
2231
2222 if (c < 0) 2232 if (c < 0)
2223 end_of_file_error (); 2233 end_of_file_error ();
2224 2234
2225 /* If purifying, and string starts with \ newline, 2235 /* If purifying, and string starts with \ newline,
2226 return zero instead. This is for doc strings 2236 return zero instead. This is for doc strings
2227 that we are really going to find in etc/DOC.nn.nn */ 2237 that we are really going to find in etc/DOC.nn.nn */
2228 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) 2238 if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
2229 return make_number (0); 2239 return make_number (0);
2230 2240
2231 if (force_multibyte) 2241 if (is_multibyte || force_singlebyte)
2232 to_multibyte (&p, &end, &nchars); 2242 ;
2233 else if (force_singlebyte)
2234 nchars = p - read_buffer;
2235 else if (load_convert_to_unibyte) 2243 else if (load_convert_to_unibyte)
2236 { 2244 {
2237 Lisp_Object string; 2245 Lisp_Object string;
2238 to_multibyte (&p, &end, &nchars); 2246 to_multibyte (&p, &end, &nchars);
2239 if (p - read_buffer != nchars) 2247 if (p - read_buffer != nchars)
2240 { 2248 {
2241 string = make_multibyte_string (read_buffer, nchars, 2249 string = make_multibyte_string (read_buffer, nchars,
2242 p - read_buffer); 2250 p - read_buffer);
2243 return Fstring_make_unibyte (string); 2251 return Fstring_make_unibyte (string);
2244 } 2252 }
2253 /* We can make a unibyte string directly. */
2254 is_multibyte = 0;
2245 } 2255 }
2246 else if (EQ (readcharfun, Qget_file_char) 2256 else if (EQ (readcharfun, Qget_file_char)
2247 || EQ (readcharfun, Qlambda)) 2257 || EQ (readcharfun, Qlambda))
2248 { 2258 {
2249 /* Nowadays, reading directly from a file is used only for 2259 /* Nowadays, reading directly from a file is used only for
2250 compiled Emacs Lisp files, and those always use the 2260 compiled Emacs Lisp files, and those always use the
2251 Emacs internal encoding. Meanwhile, Qlambda is used 2261 Emacs internal encoding. Meanwhile, Qlambda is used
2252 for reading dynamic byte code (compiled with 2262 for reading dynamic byte code (compiled with
2253 byte-compile-dynamic = t). */ 2263 byte-compile-dynamic = t). */
2254 to_multibyte (&p, &end, &nchars); 2264 to_multibyte (&p, &end, &nchars);
2265 is_multibyte = 1;
2255 } 2266 }
2256 else 2267 else
2257 /* In all other cases, if we read these bytes as 2268 /* In all other cases, if we read these bytes as
2258 separate characters, treat them as separate characters now. */ 2269 separate characters, treat them as separate characters now. */
2259 nchars = p - read_buffer; 2270 ;
2260 2271
2261 if (read_pure) 2272 if (read_pure)
2262 return make_pure_string (read_buffer, nchars, p - read_buffer, 2273 return make_pure_string (read_buffer, nchars, p - read_buffer,
2263 (force_multibyte 2274 is_multibyte);
2264 || (p - read_buffer != nchars)));
2265 return make_specified_string (read_buffer, nchars, p - read_buffer, 2275 return make_specified_string (read_buffer, nchars, p - read_buffer,
2266 (force_multibyte 2276 is_multibyte);
2267 || (p - read_buffer != nchars)));
2268 } 2277 }
2269 2278
2270 case '.': 2279 case '.':
2271 { 2280 {
2272 int next_char = READCHAR; 2281 int next_char = READCHAR;