comparison src/coding.c @ 20931:068eb408c911

(decode_coding_iso2022): Update coding->fake_multibyte. (ENCODE_SINGLE_SHIFT_2, ENCODE_SINGLE_SHIFT_3, encode_coding_iso2022, decode_coding_sjis_big5, encode_coding_sjis_big5, decode_eol, encode_eol, decode_coding, encode_coding): Likewise. (shrink_decoding_region, shrink_encoding_region): Do not skip non-ASCII code in any cases. Bug fix for getting starting address from BEG. (code_convert_region): Sync character positions correctly by paying attention to coding->fake_multibyte. (code_convert_string): Set number of character and bytes just processed in members of CODING. (code_convert_string): Adjusted for the change of code_convert_region. (code_convert_region1): Likewise.
author Kenichi Handa <handa@m17n.org>
date Fri, 20 Feb 1998 01:40:47 +0000
parents 0fa2183c587d
children e4dd62e5d921
comparison
equal deleted inserted replaced
20930:1331679fe704 20931:068eb408c911
1005 1005
1006 if (!NILP (Venable_character_unification) && NILP (unification_table)) 1006 if (!NILP (Venable_character_unification) && NILP (unification_table))
1007 unification_table = Vstandard_character_unification_table_for_decode; 1007 unification_table = Vstandard_character_unification_table_for_decode;
1008 1008
1009 coding->produced_char = 0; 1009 coding->produced_char = 0;
1010 coding->fake_multibyte = 0;
1010 while (src < src_end && (dst_bytes 1011 while (src < src_end && (dst_bytes
1011 ? (dst < adjusted_dst_end) 1012 ? (dst < adjusted_dst_end)
1012 : (dst < src - 6))) 1013 : (dst < src - 6)))
1013 { 1014 {
1014 /* SRC_BASE remembers the start position in source in each loop. 1015 /* SRC_BASE remembers the start position in source in each loop.
1044 break; 1045 break;
1045 1046
1046 case ISO_0xA0_or_0xFF: 1047 case ISO_0xA0_or_0xFF:
1047 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 1048 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1048 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) 1049 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1049 { 1050 goto label_invalid_code;
1050 /* Invalid code. */
1051 *dst++ = c1;
1052 coding->produced_char++;
1053 break;
1054 }
1055 /* This is a graphic character, we fall down ... */ 1051 /* This is a graphic character, we fall down ... */
1056 1052
1057 case ISO_graphic_plane_1: 1053 case ISO_graphic_plane_1:
1058 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) 1054 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1059 { 1055 goto label_invalid_code;
1060 /* Invalid code. */
1061 *dst++ = c1;
1062 coding->produced_char++;
1063 }
1064 else 1056 else
1065 DECODE_ISO_CHARACTER (charset1, c1); 1057 DECODE_ISO_CHARACTER (charset1, c1);
1066 break; 1058 break;
1067 1059
1068 case ISO_control_code: 1060 case ISO_control_code:
1308 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1300 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1309 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); 1301 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1310 break; 1302 break;
1311 1303
1312 label_invalid_code: 1304 label_invalid_code:
1313 coding->produced_char += src - src_base;
1314 while (src_base < src) 1305 while (src_base < src)
1315 *dst++ = *src_base++; 1306 *dst++ = *src_base++;
1307 coding->fake_multibyte = 1;
1316 } 1308 }
1317 continue; 1309 continue;
1318 1310
1319 label_end_of_loop: 1311 label_end_of_loop:
1320 result = CODING_FINISH_INSUFFICIENT_SRC; 1312 result = CODING_FINISH_INSUFFICIENT_SRC;
1321 label_end_of_loop_2: 1313 label_end_of_loop_2:
1322 src = src_base; 1314 src = src_base;
1323 break; 1315 break;
1324 } 1316 }
1325 1317
1326 if (result == CODING_FINISH_NORMAL 1318 if (src < src_end)
1327 && src < src_end) 1319 {
1328 result = CODING_FINISH_INSUFFICIENT_DST; 1320 if (result == CODING_FINISH_NORMAL)
1329 1321 result = CODING_FINISH_INSUFFICIENT_DST;
1330 /* If this is the last block of the text to be decoded, we had 1322 else if (result != CODING_FINISH_INCONSISTENT_EOL
1331 better just flush out all remaining codes in the text although 1323 && coding->mode & CODING_MODE_LAST_BLOCK)
1332 they are not valid characters. */ 1324 {
1333 if (coding->mode & CODING_MODE_LAST_BLOCK) 1325 /* This is the last block of the text to be decoded. We had
1334 { 1326 better just flush out all remaining codes in the text
1335 bcopy (src, dst, src_end - src); 1327 although they are not valid characters. */
1336 dst += (src_end - src); 1328 src_bytes = src_end - src;
1337 src = src_end; 1329 if (dst_bytes && (dst_end - dst < src_bytes))
1338 } 1330 src_bytes = dst_end - dst;
1331 bcopy (src, dst, src_bytes);
1332 dst += src_bytes;
1333 src += src_bytes;
1334 coding->fake_multibyte = 1;
1335 }
1336 }
1337
1339 coding->consumed = coding->consumed_char = src - source; 1338 coding->consumed = coding->consumed_char = src - source;
1340 coding->produced = dst - destination; 1339 coding->produced = dst - destination;
1341 return result; 1340 return result;
1342 } 1341 }
1343 1342
1411 #define ENCODE_SINGLE_SHIFT_2 \ 1410 #define ENCODE_SINGLE_SHIFT_2 \
1412 do { \ 1411 do { \
1413 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ 1412 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1414 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ 1413 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1415 else \ 1414 else \
1416 *dst++ = ISO_CODE_SS2; \ 1415 { \
1416 *dst++ = ISO_CODE_SS2; \
1417 coding->fake_multibyte = 1; \
1418 } \
1417 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ 1419 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1418 } while (0) 1420 } while (0)
1419 1421
1420 #define ENCODE_SINGLE_SHIFT_3 \ 1422 #define ENCODE_SINGLE_SHIFT_3 \
1421 do { \ 1423 do { \
1422 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ 1424 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1423 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ 1425 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1424 else \ 1426 else \
1425 *dst++ = ISO_CODE_SS3; \ 1427 { \
1428 *dst++ = ISO_CODE_SS3; \
1429 coding->fake_multibyte = 1; \
1430 } \
1426 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ 1431 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1427 } while (0) 1432 } while (0)
1428 1433
1429 /* The following four macros produce codes (control character or 1434 /* The following four macros produce codes (control character or
1430 escape sequence) for ISO2022 locking-shift functions (shift-in, 1435 escape sequence) for ISO2022 locking-shift functions (shift-in,
1744 1749
1745 if (!NILP (Venable_character_unification) && NILP (unification_table)) 1750 if (!NILP (Venable_character_unification) && NILP (unification_table))
1746 unification_table = Vstandard_character_unification_table_for_encode; 1751 unification_table = Vstandard_character_unification_table_for_encode;
1747 1752
1748 coding->consumed_char = 0; 1753 coding->consumed_char = 0;
1754 coding->fake_multibyte = 0;
1749 while (src < src_end && (dst_bytes 1755 while (src < src_end && (dst_bytes
1750 ? (dst < adjusted_dst_end) 1756 ? (dst < adjusted_dst_end)
1751 : (dst < src - 19))) 1757 : (dst < src - 19)))
1752 { 1758 {
1753 /* SRC_BASE remembers the start position in source in each loop. 1759 /* SRC_BASE remembers the start position in source in each loop.
1931 result = CODING_FINISH_INSUFFICIENT_SRC; 1937 result = CODING_FINISH_INSUFFICIENT_SRC;
1932 src = src_base; 1938 src = src_base;
1933 break; 1939 break;
1934 } 1940 }
1935 1941
1936 if (result == CODING_FINISH_NORMAL 1942 if (src < src_end)
1937 && src < src_end) 1943 {
1938 result = CODING_FINISH_INSUFFICIENT_DST; 1944 if (result == CODING_FINISH_NORMAL)
1939 1945 result = CODING_FINISH_INSUFFICIENT_DST;
1940 /* If this is the last block of the text to be encoded, we must 1946 else
1941 reset graphic planes and registers to the initial state, and 1947 /* If this is the last block of the text to be encoded, we
1942 flush out the carryover if any. */ 1948 must reset graphic planes and registers to the initial
1943 if (coding->mode & CODING_MODE_LAST_BLOCK) 1949 state, and flush out the carryover if any. */
1944 ENCODE_RESET_PLANE_AND_REGISTER; 1950 if (coding->mode & CODING_MODE_LAST_BLOCK)
1951 ENCODE_RESET_PLANE_AND_REGISTER;
1952 }
1945 1953
1946 coding->consumed = src - source; 1954 coding->consumed = src - source;
1947 coding->produced = coding->produced_char = dst - destination; 1955 coding->produced = coding->produced_char = dst - destination;
1948 return result; 1956 return result;
1949 } 1957 }
2052 else if (CHARSET_DIMENSION (charset_alt) == 1) \ 2060 else if (CHARSET_DIMENSION (charset_alt) == 1) \
2053 { \ 2061 { \
2054 if (sjis_p && charset_alt == charset_katakana_jisx0201) \ 2062 if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2055 *dst++ = c1; \ 2063 *dst++ = c1; \
2056 else \ 2064 else \
2057 *dst++ = charset_alt, *dst++ = c1; \ 2065 { \
2066 *dst++ = charset_alt, *dst++ = c1; \
2067 coding->fake_multibyte = 1; \
2068 } \
2058 } \ 2069 } \
2059 else \ 2070 else \
2060 { \ 2071 { \
2061 c1 &= 0x7F, c2 &= 0x7F; \ 2072 c1 &= 0x7F, c2 &= 0x7F; \
2062 if (sjis_p && charset_alt == charset_jisx0208) \ 2073 if (sjis_p && charset_alt == charset_jisx0208) \
2063 { \ 2074 { \
2064 unsigned char s1, s2; \ 2075 unsigned char s1, s2; \
2065 \ 2076 \
2066 ENCODE_SJIS (c1, c2, s1, s2); \ 2077 ENCODE_SJIS (c1, c2, s1, s2); \
2067 *dst++ = s1, *dst++ = s2; \ 2078 *dst++ = s1, *dst++ = s2; \
2079 coding->fake_multibyte = 1; \
2068 } \ 2080 } \
2069 else if (!sjis_p \ 2081 else if (!sjis_p \
2070 && (charset_alt == charset_big5_1 \ 2082 && (charset_alt == charset_big5_1 \
2071 || charset_alt == charset_big5_2)) \ 2083 || charset_alt == charset_big5_2)) \
2072 { \ 2084 { \
2073 unsigned char b1, b2; \ 2085 unsigned char b1, b2; \
2074 \ 2086 \
2075 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ 2087 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
2076 *dst++ = b1, *dst++ = b2; \ 2088 *dst++ = b1, *dst++ = b2; \
2077 } \ 2089 } \
2078 else \ 2090 else \
2079 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ 2091 { \
2092 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
2093 coding->fake_multibyte = 1; \
2094 } \
2080 } \ 2095 } \
2081 coding->consumed_char++; \ 2096 coding->consumed_char++; \
2082 } while (0); 2097 } while (0);
2083 2098
2084 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2099 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2153 2168
2154 if (!NILP (Venable_character_unification) && NILP (unification_table)) 2169 if (!NILP (Venable_character_unification) && NILP (unification_table))
2155 unification_table = Vstandard_character_unification_table_for_decode; 2170 unification_table = Vstandard_character_unification_table_for_decode;
2156 2171
2157 coding->produced_char = 0; 2172 coding->produced_char = 0;
2173 coding->fake_multibyte = 0;
2158 while (src < src_end && (dst_bytes 2174 while (src < src_end && (dst_bytes
2159 ? (dst < adjusted_dst_end) 2175 ? (dst < adjusted_dst_end)
2160 : (dst < src - 3))) 2176 : (dst < src - 3)))
2161 { 2177 {
2162 /* SRC_BASE remembers the start position in source in each loop. 2178 /* SRC_BASE remembers the start position in source in each loop.
2201 *dst++ = c1; 2217 *dst++ = c1;
2202 coding->produced_char++; 2218 coding->produced_char++;
2203 } 2219 }
2204 else if (c1 < 0x80) 2220 else if (c1 < 0x80)
2205 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); 2221 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2206 else if (c1 < 0xA0 || c1 >= 0xE0) 2222 else if (c1 < 0xA0)
2207 { 2223 {
2208 /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */ 2224 /* SJIS -> JISX0208 */
2209 if (sjis_p) 2225 if (sjis_p)
2210 { 2226 {
2211 ONE_MORE_BYTE (c2); 2227 ONE_MORE_BYTE (c2);
2212 DECODE_SJIS (c1, c2, c3, c4); 2228 if (c2 >= 0x40)
2213 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); 2229 {
2230 DECODE_SJIS (c1, c2, c3, c4);
2231 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2232 }
2233 else
2234 goto label_invalid_code_2;
2214 } 2235 }
2215 else if (c1 >= 0xE0 && c1 < 0xFF) 2236 else
2216 { 2237 goto label_invalid_code_1;
2217 int charset; 2238 }
2218 2239 else if (c1 < 0xE0)
2219 ONE_MORE_BYTE (c2);
2220 DECODE_BIG5 (c1, c2, charset, c3, c4);
2221 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2222 }
2223 else /* Invalid code */
2224 {
2225 *dst++ = c1;
2226 coding->produced_char++;
2227 }
2228 }
2229 else
2230 { 2240 {
2231 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ 2241 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2232 if (sjis_p) 2242 if (sjis_p)
2233 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, 2243 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2234 /* dummy */ c2); 2244 /* dummy */ c2);
2235 else 2245 else
2236 { 2246 {
2237 int charset; 2247 int charset;
2238 2248
2239 ONE_MORE_BYTE (c2); 2249 ONE_MORE_BYTE (c2);
2240 DECODE_BIG5 (c1, c2, charset, c3, c4); 2250 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2241 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); 2251 {
2252 DECODE_BIG5 (c1, c2, charset, c3, c4);
2253 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2254 }
2255 else
2256 goto label_invalid_code_2;
2242 } 2257 }
2243 } 2258 }
2259 else /* C1 >= 0xE0 */
2260 {
2261 /* SJIS -> JISX0208, BIG5 -> Big5 */
2262 if (sjis_p)
2263 {
2264 ONE_MORE_BYTE (c2);
2265 if (c2 >= 0x40)
2266 {
2267 DECODE_SJIS (c1, c2, c3, c4);
2268 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2269 }
2270 else
2271 goto label_invalid_code_2;
2272 }
2273 else
2274 {
2275 int charset;
2276
2277 ONE_MORE_BYTE (c2);
2278 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2279 {
2280 DECODE_BIG5 (c1, c2, charset, c3, c4);
2281 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2282 }
2283 else
2284 goto label_invalid_code_2;
2285 }
2286 }
2287 continue;
2288
2289 label_invalid_code_1:
2290 *dst++ = c1;
2291 coding->produced_char++;
2292 coding->fake_multibyte = 1;
2293 continue;
2294
2295 label_invalid_code_2:
2296 *dst++ = c1; *dst++= c2;
2297 coding->produced_char += 2;
2298 coding->fake_multibyte = 1;
2244 continue; 2299 continue;
2245 2300
2246 label_end_of_loop: 2301 label_end_of_loop:
2247 result = CODING_FINISH_INSUFFICIENT_SRC; 2302 result = CODING_FINISH_INSUFFICIENT_SRC;
2248 label_end_of_loop_2: 2303 label_end_of_loop_2:
2249 src = src_base; 2304 src = src_base;
2250 break; 2305 break;
2251 } 2306 }
2252 2307
2253 if (result == CODING_FINISH_NORMAL 2308 if (src < src_end)
2254 && src < src_end) 2309 {
2255 result = CODING_FINISH_INSUFFICIENT_DST; 2310 if (result == CODING_FINISH_NORMAL)
2311 result = CODING_FINISH_INSUFFICIENT_DST;
2312 else if (result != CODING_FINISH_INCONSISTENT_EOL
2313 && coding->mode & CODING_MODE_LAST_BLOCK)
2314 {
2315 src_bytes = src_end - src;
2316 if (dst_bytes && (dst_end - dst < src_bytes))
2317 src_bytes = dst_end - dst;
2318 bcopy (dst, src, src_bytes);
2319 src += src_bytes;
2320 dst += src_bytes;
2321 coding->fake_multibyte = 1;
2322 }
2323 }
2256 2324
2257 coding->consumed = coding->consumed_char = src - source; 2325 coding->consumed = coding->consumed_char = src - source;
2258 coding->produced = dst - destination; 2326 coding->produced = dst - destination;
2259 return result; 2327 return result;
2260 } 2328 }
2289 2357
2290 if (!NILP (Venable_character_unification) && NILP (unification_table)) 2358 if (!NILP (Venable_character_unification) && NILP (unification_table))
2291 unification_table = Vstandard_character_unification_table_for_encode; 2359 unification_table = Vstandard_character_unification_table_for_encode;
2292 2360
2293 coding->consumed_char = 0; 2361 coding->consumed_char = 0;
2362 coding->fake_multibyte = 0;
2294 while (src < src_end && (dst_bytes 2363 while (src < src_end && (dst_bytes
2295 ? (dst < adjusted_dst_end) 2364 ? (dst < adjusted_dst_end)
2296 : (dst < src - 1))) 2365 : (dst < src - 1)))
2297 { 2366 {
2298 /* SRC_BASE remembers the start position in source in each loop. 2367 /* SRC_BASE remembers the start position in source in each loop.
2400 { 2469 {
2401 unsigned char *src = source; 2470 unsigned char *src = source;
2402 unsigned char *src_end = source + src_bytes; 2471 unsigned char *src_end = source + src_bytes;
2403 unsigned char *dst = destination; 2472 unsigned char *dst = destination;
2404 unsigned char *dst_end = destination + dst_bytes; 2473 unsigned char *dst_end = destination + dst_bytes;
2474 unsigned char c;
2405 int result = CODING_FINISH_NORMAL; 2475 int result = CODING_FINISH_NORMAL;
2476
2477 coding->fake_multibyte = 0;
2406 2478
2407 if (src_bytes <= 0) 2479 if (src_bytes <= 0)
2408 return result; 2480 return result;
2409 2481
2410 switch (coding->eol_type) 2482 switch (coding->eol_type)
2419 while (src < src_end && (dst_bytes 2491 while (src < src_end && (dst_bytes
2420 ? (dst < adjusted_dst_end) 2492 ? (dst < adjusted_dst_end)
2421 : (dst < src - 1))) 2493 : (dst < src - 1)))
2422 { 2494 {
2423 unsigned char *src_base = src; 2495 unsigned char *src_base = src;
2424 unsigned char c = *src++; 2496
2497 c = *src++;
2425 if (c == '\r') 2498 if (c == '\r')
2426 { 2499 {
2427 ONE_MORE_BYTE (c); 2500 ONE_MORE_BYTE (c);
2428 if (c != '\n') 2501 if (c != '\n')
2429 { 2502 {
2431 { 2504 {
2432 result = CODING_FINISH_INCONSISTENT_EOL; 2505 result = CODING_FINISH_INCONSISTENT_EOL;
2433 goto label_end_of_loop_2; 2506 goto label_end_of_loop_2;
2434 } 2507 }
2435 *dst++ = '\r'; 2508 *dst++ = '\r';
2509 if (BASE_LEADING_CODE_P (c))
2510 coding->fake_multibyte = 1;
2436 } 2511 }
2437 *dst++ = c; 2512 *dst++ = c;
2438 } 2513 }
2439 else if (c == '\n' 2514 else if (c == '\n'
2440 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) 2515 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2441 { 2516 {
2442 result = CODING_FINISH_INCONSISTENT_EOL; 2517 result = CODING_FINISH_INCONSISTENT_EOL;
2443 goto label_end_of_loop_2; 2518 goto label_end_of_loop_2;
2444 } 2519 }
2445 else 2520 else
2446 *dst++ = c; 2521 {
2522 *dst++ = c;
2523 if (BASE_LEADING_CODE_P (c))
2524 coding->fake_multibyte = 1;
2525 }
2447 continue; 2526 continue;
2448 2527
2449 label_end_of_loop: 2528 label_end_of_loop:
2450 result = CODING_FINISH_INSUFFICIENT_SRC; 2529 result = CODING_FINISH_INSUFFICIENT_SRC;
2451 label_end_of_loop_2: 2530 label_end_of_loop_2:
2459 break; 2538 break;
2460 2539
2461 case CODING_EOL_CR: 2540 case CODING_EOL_CR:
2462 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) 2541 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2463 { 2542 {
2464 while (src < src_end) if (*src++ == '\n') break; 2543 while (src < src_end)
2544 {
2545 if ((c = *src++) == '\n')
2546 break;
2547 if (BASE_LEADING_CODE_P (c))
2548 coding->fake_multibyte = 1;
2549 }
2465 if (*--src == '\n') 2550 if (*--src == '\n')
2466 { 2551 {
2467 src_bytes = src - source; 2552 src_bytes = src - source;
2468 result = CODING_FINISH_INCONSISTENT_EOL; 2553 result = CODING_FINISH_INCONSISTENT_EOL;
2469 } 2554 }
2491 bcopy (source, destination, src_bytes); 2576 bcopy (source, destination, src_bytes);
2492 else 2577 else
2493 safe_bcopy (source, destination, src_bytes); 2578 safe_bcopy (source, destination, src_bytes);
2494 src += src_bytes; 2579 src += src_bytes;
2495 dst += dst_bytes; 2580 dst += dst_bytes;
2581 coding->fake_multibyte = 1;
2496 break; 2582 break;
2497 } 2583 }
2498 2584
2499 coding->consumed = coding->consumed_char = src - source; 2585 coding->consumed = coding->consumed_char = src - source;
2500 coding->produced = coding->produced_char = dst - destination; 2586 coding->produced = coding->produced_char = dst - destination;
2512 int src_bytes, dst_bytes; 2598 int src_bytes, dst_bytes;
2513 { 2599 {
2514 unsigned char *src = source; 2600 unsigned char *src = source;
2515 unsigned char *dst = destination; 2601 unsigned char *dst = destination;
2516 int result = CODING_FINISH_NORMAL; 2602 int result = CODING_FINISH_NORMAL;
2603
2604 coding->fake_multibyte = 0;
2517 2605
2518 if (coding->eol_type == CODING_EOL_CRLF) 2606 if (coding->eol_type == CODING_EOL_CRLF)
2519 { 2607 {
2520 unsigned char c; 2608 unsigned char c;
2521 unsigned char *src_end = source + src_bytes; 2609 unsigned char *src_end = source + src_bytes;
2532 c = *src++; 2620 c = *src++;
2533 if (c == '\n' 2621 if (c == '\n'
2534 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) 2622 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2535 *dst++ = '\r', *dst++ = '\n'; 2623 *dst++ = '\r', *dst++ = '\n';
2536 else 2624 else
2537 *dst++ = c; 2625 {
2626 *dst++ = c;
2627 if (BASE_LEADING_CODE_P (c))
2628 coding->fake_multibyte = 1;
2629 }
2538 } 2630 }
2539 if (src < src_end) 2631 if (src < src_end)
2540 result = CODING_FINISH_INSUFFICIENT_DST; 2632 result = CODING_FINISH_INSUFFICIENT_DST;
2541 } 2633 }
2542 else 2634 else
2543 { 2635 {
2636 unsigned char c;
2637
2544 if (dst_bytes && src_bytes > dst_bytes) 2638 if (dst_bytes && src_bytes > dst_bytes)
2545 { 2639 {
2546 src_bytes = dst_bytes; 2640 src_bytes = dst_bytes;
2547 result = CODING_FINISH_INSUFFICIENT_DST; 2641 result = CODING_FINISH_INSUFFICIENT_DST;
2548 } 2642 }
2549 if (dst_bytes) 2643 if (dst_bytes)
2550 bcopy (source, destination, src_bytes); 2644 bcopy (source, destination, src_bytes);
2551 else 2645 else
2552 safe_bcopy (source, destination, src_bytes); 2646 {
2647 safe_bcopy (source, destination, src_bytes);
2648 dst_bytes = src_bytes;
2649 }
2553 if (coding->eol_type == CODING_EOL_CRLF) 2650 if (coding->eol_type == CODING_EOL_CRLF)
2554 { 2651 {
2555 while (src_bytes--) 2652 while (src_bytes--)
2556 if (*dst++ == '\n') dst[-1] = '\r'; 2653 {
2557 } 2654 if ((c = *dst++) == '\n')
2558 else if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) 2655 dst[-1] = '\r';
2559 { 2656 else if (BASE_LEADING_CODE_P (c))
2560 while (src_bytes--) 2657 coding->fake_multibyte = 1;
2561 if (*dst++ == '\r') dst[-1] = '\n'; 2658 }
2562 } 2659 }
2563 src += src_bytes; 2660 else
2564 dst += src_bytes; 2661 {
2662 if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2663 {
2664 while (src_bytes--)
2665 if (*dst++ == '\r') dst[-1] = '\n';
2666 }
2667 coding->fake_multibyte = 1;
2668 }
2669 src = source + dst_bytes;
2670 dst = destination + dst_bytes;
2565 } 2671 }
2566 2672
2567 coding->consumed = coding->consumed_char = src - source; 2673 coding->consumed = coding->consumed_char = src - source;
2568 coding->produced = coding->produced_char = dst - destination; 2674 coding->produced = coding->produced_char = dst - destination;
2569 return result; 2675 return result;
3456 3562
3457 if (src_bytes <= 0) 3563 if (src_bytes <= 0)
3458 { 3564 {
3459 coding->produced = coding->produced_char = 0; 3565 coding->produced = coding->produced_char = 0;
3460 coding->consumed = coding->consumed_char = 0; 3566 coding->consumed = coding->consumed_char = 0;
3567 coding->fake_multibyte = 0;
3461 return CODING_FINISH_NORMAL; 3568 return CODING_FINISH_NORMAL;
3462 } 3569 }
3463 3570
3464 if (coding->type == coding_type_undecided) 3571 if (coding->type == coding_type_undecided)
3465 detect_coding (coding, source, src_bytes); 3572 detect_coding (coding, source, src_bytes);
3512 } 3619 }
3513 if (dst_bytes) 3620 if (dst_bytes)
3514 bcopy (source, destination, coding->produced); 3621 bcopy (source, destination, coding->produced);
3515 else 3622 else
3516 safe_bcopy (source, destination, coding->produced); 3623 safe_bcopy (source, destination, coding->produced);
3624 coding->fake_multibyte = 1;
3517 coding->consumed 3625 coding->consumed
3518 = coding->consumed_char = coding->produced_char = coding->produced; 3626 = coding->consumed_char = coding->produced_char = coding->produced;
3519 break; 3627 break;
3520 } 3628 }
3521 3629
3534 3642
3535 if (src_bytes <= 0) 3643 if (src_bytes <= 0)
3536 { 3644 {
3537 coding->produced = coding->produced_char = 0; 3645 coding->produced = coding->produced_char = 0;
3538 coding->consumed = coding->consumed_char = 0; 3646 coding->consumed = coding->consumed_char = 0;
3647 coding->fake_multibyte = 0;
3539 return CODING_FINISH_NORMAL; 3648 return CODING_FINISH_NORMAL;
3540 } 3649 }
3541 3650
3542 switch (coding->type) 3651 switch (coding->type)
3543 { 3652 {
3590 { 3699 {
3591 unsigned char *p = destination, *pend = p + coding->produced; 3700 unsigned char *p = destination, *pend = p + coding->produced;
3592 while (p < pend) 3701 while (p < pend)
3593 if (*p++ == '\015') p[-1] = '\n'; 3702 if (*p++ == '\015') p[-1] = '\n';
3594 } 3703 }
3704 coding->fake_multibyte = 1;
3595 coding->consumed 3705 coding->consumed
3596 = coding->consumed_char = coding->produced_char = coding->produced; 3706 = coding->consumed_char = coding->produced_char = coding->produced;
3597 break; 3707 break;
3598 } 3708 }
3599 3709
3600 return result; 3710 return result;
3601 } 3711 }
3602 3712
3603 /* Scan text in the region between *BEG and *END, skip characters 3713 /* Scan text in the region between *BEG and *END (byte positions),
3604 which we don't have to decode by coding system CODING at the head 3714 skip characters which we don't have to decode by coding system
3605 and tail, then set *BEG and *END to the region of the text we 3715 CODING at the head and tail, then set *BEG and *END to the region
3606 actually have to convert. 3716 of the text we actually have to convert. The caller should move
3717 the gap out of the region in advance.
3607 3718
3608 If STR is not NULL, *BEG and *END are indices into STR. */ 3719 If STR is not NULL, *BEG and *END are indices into STR. */
3609 3720
3610 static void 3721 static void
3611 shrink_decoding_region (beg, end, coding, str) 3722 shrink_decoding_region (beg, end, coding, str)
3612 int *beg, *end; 3723 int *beg, *end;
3613 struct coding_system *coding; 3724 struct coding_system *coding;
3614 unsigned char *str; 3725 unsigned char *str;
3615 { 3726 {
3616 unsigned char *begp_orig, *begp, *endp_orig, *endp; 3727 unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3617 int eol_conversion; 3728 int eol_conversion;
3618 3729
3619 if (coding->type == coding_type_ccl 3730 if (coding->type == coding_type_ccl
3620 || coding->type == coding_type_undecided 3731 || coding->type == coding_type_undecided
3621 || !NILP (coding->post_read_conversion)) 3732 || !NILP (coding->post_read_conversion))
3623 /* We can't skip any data. */ 3734 /* We can't skip any data. */
3624 return; 3735 return;
3625 } 3736 }
3626 else if (coding->type == coding_type_no_conversion) 3737 else if (coding->type == coding_type_no_conversion)
3627 { 3738 {
3628 /* We need no conversion. */ 3739 /* We need no conversion, but don't have to skip any data here.
3629 *beg = *end; 3740 Decoding routine handles them effectively anyway. */
3630 return; 3741 return;
3631 } 3742 }
3632 3743
3633 if (coding->heading_ascii >= 0) 3744 if (coding->heading_ascii >= 0)
3634 /* Detection routine has already found how much we can skip at the 3745 /* Detection routine has already found how much we can skip at the
3640 begp_orig = begp = str + *beg; 3751 begp_orig = begp = str + *beg;
3641 endp_orig = endp = str + *end; 3752 endp_orig = endp = str + *end;
3642 } 3753 }
3643 else 3754 else
3644 { 3755 {
3645 move_gap (*beg); 3756 begp_orig = begp = BYTE_POS_ADDR (*beg);
3646 begp_orig = begp = GAP_END_ADDR;
3647 endp_orig = endp = begp + *end - *beg; 3757 endp_orig = endp = begp + *end - *beg;
3648 } 3758 }
3649 3759
3650 eol_conversion = (coding->eol_type != CODING_EOL_LF); 3760 eol_conversion = (coding->eol_type != CODING_EOL_LF);
3651 3761
3654 case coding_type_emacs_mule: 3764 case coding_type_emacs_mule:
3655 case coding_type_raw_text: 3765 case coding_type_raw_text:
3656 if (eol_conversion) 3766 if (eol_conversion)
3657 { 3767 {
3658 if (coding->heading_ascii < 0) 3768 if (coding->heading_ascii < 0)
3659 while (begp < endp && *begp != '\r') begp++; 3769 while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3660 while (begp < endp && *(endp - 1) != '\r') endp--; 3770 while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80)
3771 endp--;
3661 } 3772 }
3662 else 3773 else
3663 begp = endp; 3774 begp = endp;
3664 break; 3775 break;
3665 3776
3684 break; 3795 break;
3685 3796
3686 default: /* i.e. case coding_type_iso2022: */ 3797 default: /* i.e. case coding_type_iso2022: */
3687 if (coding->heading_ascii < 0) 3798 if (coding->heading_ascii < 0)
3688 { 3799 {
3689 unsigned char c;
3690
3691 /* We can skip all ASCII characters at the head except for a 3800 /* We can skip all ASCII characters at the head except for a
3692 few control codes. */ 3801 few control codes. */
3693 while (begp < endp && (c = *begp) < 0x80 3802 while (begp < endp && (c = *begp) < 0x80
3694 && c != ISO_CODE_CR && c != ISO_CODE_SO 3803 && c != ISO_CODE_CR && c != ISO_CODE_SO
3695 && c != ISO_CODE_SI && c != ISO_CODE_ESC 3804 && c != ISO_CODE_SI && c != ISO_CODE_ESC
3700 { 3809 {
3701 case CODING_CATEGORY_IDX_ISO_8_1: 3810 case CODING_CATEGORY_IDX_ISO_8_1:
3702 case CODING_CATEGORY_IDX_ISO_8_2: 3811 case CODING_CATEGORY_IDX_ISO_8_2:
3703 /* We can skip all ASCII characters at the tail. */ 3812 /* We can skip all ASCII characters at the tail. */
3704 if (eol_conversion) 3813 if (eol_conversion)
3705 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--; 3814 while (begp < endp && (c = endp[-1]) < 0x80 && c != '\n') endp--;
3706 else 3815 else
3707 while (begp < endp && endp[-1] < 0x80) endp--; 3816 while (begp < endp && endp[-1] < 0x80) endp--;
3708 break; 3817 break;
3709 3818
3710 case CODING_CATEGORY_IDX_ISO_7: 3819 case CODING_CATEGORY_IDX_ISO_7:
3711 case CODING_CATEGORY_IDX_ISO_7_TIGHT: 3820 case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3712 /* We can skip all charactes at the tail except for ESC and 3821 /* We can skip all charactes at the tail except for ESC and
3713 the following 2-byte at the tail. */ 3822 the following 2-byte at the tail. */
3714 if (eol_conversion) 3823 if (eol_conversion)
3715 while (begp < endp && endp[-1] != ISO_CODE_ESC && endp[-1] != '\n') 3824 while (begp < endp
3825 && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\n')
3716 endp--; 3826 endp--;
3717 else 3827 else
3718 while (begp < endp && endp[-1] != ISO_CODE_ESC) 3828 while (begp < endp
3829 && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3719 endp--; 3830 endp--;
3720 if (begp < endp && endp[-1] == ISO_CODE_ESC) 3831 if (begp < endp && endp[-1] == ISO_CODE_ESC)
3721 { 3832 {
3722 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') 3833 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3723 /* This is an ASCII designation sequence. We can 3834 /* This is an ASCII designation sequence. We can
3760 begp_orig = begp = str + *beg; 3871 begp_orig = begp = str + *beg;
3761 endp_orig = endp = str + *end; 3872 endp_orig = endp = str + *end;
3762 } 3873 }
3763 else 3874 else
3764 { 3875 {
3765 move_gap (*beg); 3876 begp_orig = begp = BYTE_POS_ADDR (*beg);
3766 begp_orig = begp = GAP_END_ADDR;
3767 endp_orig = endp = begp + *end - *beg; 3877 endp_orig = endp = begp + *end - *beg;
3768 } 3878 }
3769 3879
3770 eol_conversion = (coding->eol_type == CODING_EOL_CR 3880 eol_conversion = (coding->eol_type == CODING_EOL_CR
3771 || coding->eol_type == CODING_EOL_CRLF); 3881 || coding->eol_type == CODING_EOL_CRLF);
3819 *end += endp - endp_orig; 3929 *end += endp - endp_orig;
3820 return; 3930 return;
3821 } 3931 }
3822 3932
3823 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the 3933 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3824 text from FROM to TO by coding system CODING, and return number of 3934 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3825 characters in the resulting text. 3935 coding system CODING, and return the status code of code conversion
3936 (currently, this value has no meaning).
3937
3938 How many characters (and bytes) are converted to how many
3939 characters (and bytes) are recorded in members of the structure
3940 CODING.
3826 3941
3827 If ADJUST is nonzero, we do various things as if the original text 3942 If ADJUST is nonzero, we do various things as if the original text
3828 is deleted and a new text is inserted. See the comments in 3943 is deleted and a new text is inserted. See the comments in
3829 replace_range (insdel.c) to know what we are doing. 3944 replace_range (insdel.c) to know what we are doing.
3830 3945
3831 ADJUST nonzero also means that post-read-conversion or 3946 ADJUST nonzero also means that post-read-conversion or
3832 pre-write-conversion functions (if any) should be processed. */ 3947 pre-write-conversion functions (if any) should be processed. */
3833 3948
3834 int 3949 int
3835 code_convert_region (from, to, coding, encodep, adjust) 3950 code_convert_region (from, from_byte, to, to_byte, coding, encodep, adjust)
3836 int from, to, encodep, adjust; 3951 int from, from_byte, to, to_byte, encodep, adjust;
3837 struct coding_system *coding; 3952 struct coding_system *coding;
3838 { 3953 {
3839 int len = to - from, require, inserted, inserted_byte; 3954 int len = to - from, len_byte = to_byte - from_byte;
3840 int from_byte, to_byte, len_byte; 3955 int require, inserted, inserted_byte;
3841 int from_byte_orig, to_byte_orig; 3956 int from_byte_orig, to_byte_orig;
3842 Lisp_Object saved_coding_symbol = Qnil; 3957 Lisp_Object saved_coding_symbol = Qnil;
3958 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3959 int first = 1;
3960 int fake_multibyte = 0;
3961 unsigned char *src, *dst;
3843 3962
3844 if (adjust) 3963 if (adjust)
3845 { 3964 {
3965 int saved_from = from;
3966
3846 prepare_to_modify_buffer (from, to, &from); 3967 prepare_to_modify_buffer (from, to, &from);
3847 to = from + len; 3968 if (saved_from != from)
3848 } 3969 {
3849 from_byte = CHAR_TO_BYTE (from); to_byte = CHAR_TO_BYTE (to); 3970 to = from + len;
3850 len_byte = to_byte - from_byte; 3971 if (multibyte)
3972 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
3973 else
3974 from_byte = from, to_byte = to;
3975 len_byte = to_byte - from_byte;
3976 }
3977 }
3851 3978
3852 if (! encodep && CODING_REQUIRE_DETECTION (coding)) 3979 if (! encodep && CODING_REQUIRE_DETECTION (coding))
3853 { 3980 {
3854 /* We must detect encoding of text and eol. Even if detection 3981 /* We must detect encoding of text and eol. Even if detection
3855 routines can't decide the encoding, we should not let them 3982 routines can't decide the encoding, we should not let them
3858 3985
3859 if (from < GPT && to > GPT) 3986 if (from < GPT && to > GPT)
3860 move_gap_both (from, from_byte); 3987 move_gap_both (from, from_byte);
3861 if (coding->type == coding_type_undecided) 3988 if (coding->type == coding_type_undecided)
3862 { 3989 {
3863 detect_coding (coding, BYTE_POS_ADDR (from), len); 3990 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
3864 if (coding->type == coding_type_undecided) 3991 if (coding->type == coding_type_undecided)
3865 coding->type = coding_type_emacs_mule; 3992 coding->type = coding_type_emacs_mule;
3866 } 3993 }
3867 if (coding->eol_type == CODING_EOL_UNDECIDED) 3994 if (coding->eol_type == CODING_EOL_UNDECIDED)
3868 { 3995 {
3874 encounter an inconsitent eol format while decoding. */ 4001 encounter an inconsitent eol format while decoding. */
3875 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; 4002 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
3876 } 4003 }
3877 } 4004 }
3878 4005
4006 coding->consumed_char = len, coding->consumed = len_byte;
4007
3879 if (encodep 4008 if (encodep
3880 ? ! CODING_REQUIRE_ENCODING (coding) 4009 ? ! CODING_REQUIRE_ENCODING (coding)
3881 : ! CODING_REQUIRE_DECODING (coding)) 4010 : ! CODING_REQUIRE_DECODING (coding))
3882 return len; 4011 {
4012 coding->produced = len_byte;
4013 if (multibyte)
4014 {
4015 if (GPT < from || GPT > to)
4016 move_gap_both (from, from_byte);
4017 coding->produced_char
4018 = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4019 if (coding->produced_char != len)
4020 {
4021 int diff = coding->produced_char - len;
4022
4023 if (adjust)
4024 adjust_before_replace (from, from_byte, to, to_byte);
4025 ZV += diff; Z += diff; GPT += diff;
4026 if (adjust)
4027 adjust_after_replace (from, from_byte, to, to_byte,
4028 diff, 0);
4029 }
4030 }
4031 else
4032 coding->produced_char = len_byte;
4033 return 0;
4034 }
3883 4035
3884 /* Now we convert the text. */ 4036 /* Now we convert the text. */
3885 4037
3886 /* For encoding, we must process pre-write-conversion in advance. */ 4038 /* For encoding, we must process pre-write-conversion in advance. */
3887 if (encodep 4039 if (encodep
3898 if (current_buffer != prev) 4050 if (current_buffer != prev)
3899 { 4051 {
3900 len = ZV - BEGV; 4052 len = ZV - BEGV;
3901 new = current_buffer; 4053 new = current_buffer;
3902 set_buffer_internal_1 (prev); 4054 set_buffer_internal_1 (prev);
3903 del_range (from, to); 4055 del_range_2 (from, to, from_byte, to_byte);
3904 insert_from_buffer (new, BEG, len, 0); 4056 insert_from_buffer (new, BEG, len, 0);
3905 to = from + len; 4057 to = from + len;
3906 to_byte = CHAR_TO_BYTE (to); 4058 to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
3907 len_byte = to_byte - from_byte; 4059 len_byte = to_byte - from_byte;
3908 } 4060 }
3909 } 4061 }
3910 4062
3911 /* Try to skip the heading and tailing ASCIIs. */ 4063 /* Try to skip the heading and tailing ASCIIs. */
3912 from_byte_orig = from_byte; to_byte_orig = to_byte; 4064 from_byte_orig = from_byte; to_byte_orig = to_byte;
4065 if (from < GPT && GPT < to)
4066 move_gap (from);
3913 if (encodep) 4067 if (encodep)
3914 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); 4068 shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
3915 else 4069 else
3916 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); 4070 shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
3917 if (from_byte == to_byte) 4071 if (from_byte == to_byte)
3918 return len; 4072 {
4073 coding->produced = len_byte;
4074 coding->produced_char = multibyte ? len : len_byte;
4075 return 0;
4076 }
4077
3919 /* Here, the excluded region by shrinking contains only ASCIIs. */ 4078 /* Here, the excluded region by shrinking contains only ASCIIs. */
3920 from += (from_byte - from_byte_orig); 4079 from += (from_byte - from_byte_orig);
3921 to += (to_byte - to_byte_orig); 4080 to += (to_byte - to_byte_orig);
3922 len = to - from; 4081 len = to - from;
3923 len_byte = to_byte - from_byte; 4082 len_byte = to_byte - from_byte;
3924 4083
3925 /* For converion, we must put the gap before the text to be decoded 4084 /* For converion, we must put the gap before the text in addition to
3926 in addition to make the gap larger for efficient decoding. The 4085 making the gap larger for efficient decoding. The required gap
3927 required gap size starts from 2000 which is the magic number used 4086 size starts from 2000 which is the magic number used in make_gap.
3928 in make_gap. But, after one batch of conversion, it will be 4087 But, after one batch of conversion, it will be incremented if we
3929 incremented if we find that it is not enough . */ 4088 find that it is not enough . */
3930 require = 2000; 4089 require = 2000;
3931 4090
3932 if (GAP_SIZE < require) 4091 if (GAP_SIZE < require)
3933 make_gap (require - GAP_SIZE); 4092 make_gap (require - GAP_SIZE);
3934 move_gap_both (from, from_byte); 4093 move_gap_both (from, from_byte);
3940 beg_unchanged = GPT - BEG; 4099 beg_unchanged = GPT - BEG;
3941 if (Z - GPT < end_unchanged) 4100 if (Z - GPT < end_unchanged)
3942 end_unchanged = Z - GPT; 4101 end_unchanged = Z - GPT;
3943 4102
3944 inserted = inserted_byte = 0; 4103 inserted = inserted_byte = 0;
4104 src = GAP_END_ADDR, dst = GPT_ADDR;
4105
4106 GAP_SIZE += len_byte;
4107 ZV -= len;
4108 Z -= len;
4109 ZV_BYTE -= len_byte;
4110 Z_BYTE -= len_byte;
4111
3945 for (;;) 4112 for (;;)
3946 { 4113 {
3947 int result, diff_char, diff_byte; 4114 int result;
3948 4115
3949 /* The buffer memory is changed from: 4116 /* The buffer memory is changed from:
3950 +--------+converted-text+------------+-----original-text-----+---+ 4117 +--------+converted-text+---------+-------original-text------+---+
3951 |<-from->|<--inserted-->|<-GAP_SIZE->|<---------len--------->|---| */ 4118 |<-from->|<--inserted-->|---------|<-----------len---------->|---|
3952 4119 |<------------------- GAP_SIZE -------------------->| */
3953 if (encodep) 4120 if (encodep)
3954 result = encode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); 4121 result = encode_coding (coding, src, dst, len_byte, 0);
3955 else 4122 else
3956 result = decode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); 4123 result = decode_coding (coding, src, dst, len_byte, 0);
3957 /* to: 4124 /* to:
3958 +--------+-------converted-text--------+--+---original-text--+---+ 4125 +--------+-------converted-text--------+--+---original-text--+---+
3959 |<-from->|<----(inserted+produced)---->|--|<-(len-consumed)->|---| */ 4126 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
3960 4127 |<------------------- GAP_SIZE -------------------->| */
3961 diff_char = coding->produced_char - coding->consumed_char; 4128 if (coding->fake_multibyte)
3962 diff_byte = coding->produced - coding->consumed; 4129 fake_multibyte = 1;
3963 4130
3964 GAP_SIZE -= diff_byte; 4131 if (!encodep && !multibyte)
3965 ZV += diff_char; ZV_BYTE += diff_byte; 4132 coding->produced_char = coding->produced;
3966 Z += diff_char; Z_BYTE += diff_byte;
3967 GPT += coding->produced_char; GPT_BYTE += coding->produced;
3968
3969 inserted += coding->produced_char; 4133 inserted += coding->produced_char;
3970 inserted_byte += coding->produced; 4134 inserted_byte += coding->produced;
3971 len -= coding->consumed_char;
3972 len_byte -= coding->consumed; 4135 len_byte -= coding->consumed;
4136 src += coding->consumed;
4137 dst += inserted_byte;
3973 4138
3974 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) 4139 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
3975 { 4140 {
3976 unsigned char *p = GPT_ADDR - inserted_byte, *pend = GPT_ADDR; 4141 unsigned char *pend = dst, *p = pend - inserted_byte;
3977 4142
3978 /* Encode LFs back to the original eol format (CR or CRLF). */ 4143 /* Encode LFs back to the original eol format (CR or CRLF). */
3979 if (coding->eol_type == CODING_EOL_CR) 4144 if (coding->eol_type == CODING_EOL_CR)
3980 { 4145 {
3981 while (p < pend) if (*p++ == '\n') p[-1] = '\r'; 4146 while (p < pend) if (*p++ == '\n') p[-1] = '\r';
3982 } 4147 }
3983 else 4148 else
3984 { 4149 {
3985 unsigned char *p2 = p;
3986 int count = 0; 4150 int count = 0;
3987 4151
3988 while (p2 < pend) if (*p2++ == '\n') count++; 4152 while (p < pend) if (*p++ == '\n') count++;
3989 if (GAP_SIZE < count) 4153 if (src - dst < count)
3990 make_gap (count - GAP_SIZE);
3991 p2 = GPT_ADDR + count;
3992 while (p < pend)
3993 { 4154 {
3994 *--p2 = *--pend; 4155 /* We don't have sufficient room for putting LFs
3995 if (*pend == '\n') *--p2 = '\r'; 4156 back to CRLF. We must record converted and
4157 not-yet-converted text back to the buffer
4158 content, enlarge the gap, then record them out of
4159 the buffer contents again. */
4160 int add = len_byte + inserted_byte;
4161
4162 GAP_SIZE -= add;
4163 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4164 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4165 make_gap (count - GAP_SIZE);
4166 GAP_SIZE += add;
4167 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4168 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4169 /* Don't forget to update SRC, DST, and PEND. */
4170 src = GAP_END_ADDR - len_byte;
4171 dst = GPT_ADDR + inserted_byte;
4172 pend = dst;
3996 } 4173 }
3997 GPT += count; GAP_SIZE -= count; ZV += count; Z += count;
3998 ZV_BYTE += count; Z_BYTE += count;
3999 coding->produced += count;
4000 coding->produced_char += count;
4001 inserted += count; 4174 inserted += count;
4002 inserted_byte += count; 4175 inserted_byte += count;
4176 coding->produced += count;
4177 p = dst = pend + count;
4178 while (count)
4179 {
4180 *--p = *--pend;
4181 if (*p == '\n') count--, *--p = '\r';
4182 }
4003 } 4183 }
4004 4184
4005 /* Suppress eol-format conversion in the further conversion. */ 4185 /* Suppress eol-format conversion in the further conversion. */
4006 coding->eol_type = CODING_EOL_LF; 4186 coding->eol_type = CODING_EOL_LF;
4007 4187
4008 /* Restore the original symbol. */ 4188 /* Restore the original symbol. */
4009 coding->symbol = saved_coding_symbol; 4189 coding->symbol = saved_coding_symbol;
4190
4191 continue;
4010 } 4192 }
4011 if (len_byte <= 0) 4193 if (len_byte <= 0)
4012 break; 4194 break;
4013 if (result == CODING_FINISH_INSUFFICIENT_SRC) 4195 if (result == CODING_FINISH_INSUFFICIENT_SRC)
4014 { 4196 {
4015 /* The source text ends in invalid codes. Let's just 4197 /* The source text ends in invalid codes. Let's just
4016 make them valid buffer contents, and finish conversion. */ 4198 make them valid buffer contents, and finish conversion. */
4017 inserted += len; 4199 inserted += len_byte;
4018 inserted_byte += len_byte; 4200 inserted_byte += len_byte;
4201 while (len_byte--)
4202 *src++ = *dst++;
4203 fake_multibyte = 1;
4019 break; 4204 break;
4020 } 4205 }
4021 if (inserted == coding->produced_char) 4206 if (first)
4022 /* We have just done the first batch of conversion. Let's 4207 {
4023 reconsider the required gap size now. 4208 /* We have just done the first batch of conversion which was
4024 4209 stoped because of insufficient gap. Let's reconsider the
4025 We have converted CONSUMED bytes into PRODUCED bytes. To 4210 required gap size (i.e. SRT - DST) now.
4026 convert the remaining LEN bytes, we may need REQUIRE bytes 4211
4027 of gap, where: 4212 We have converted ORIG bytes (== coding->consumed) into
4028 REQUIRE + LEN = (LEN * PRODUCED / CONSUMED) 4213 NEW bytes (coding->produced). To convert the remaining
4029 REQUIRE = LEN * (PRODUCED - CONSUMED) / CONSUMED 4214 LEN bytes, we may need REQUIRE bytes of gap, where:
4030 = LEN * DIFF / CONSUMED 4215 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4031 Here, we are sure that DIFF is positive. */ 4216 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4032 require = len_byte * diff_byte / coding->consumed; 4217 Here, we are sure that NEW >= ORIG. */
4033 if (GAP_SIZE < require) 4218 require = (len_byte * (coding->produced - coding->consumed)
4034 make_gap (require - GAP_SIZE); 4219 / coding->consumed);
4035 } 4220 first = 0;
4036 if (GAP_SIZE > 0) *GPT_ADDR = 0; /* Put an anchor. */ 4221 }
4222 if ((src - dst) < (require + 2000))
4223 {
4224 /* See the comment above the previous call of make_gap. */
4225 int add = len_byte + inserted_byte;
4226
4227 GAP_SIZE -= add;
4228 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4229 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4230 make_gap (require + 2000);
4231 GAP_SIZE += add;
4232 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4233 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4234 /* Don't forget to update SRC, DST. */
4235 src = GAP_END_ADDR - len_byte;
4236 dst = GPT_ADDR + inserted_byte;
4237 }
4238 }
4239 if (src - dst > 0) *dst = 0; /* Put an anchor. */
4240
4241 if (multibyte && (fake_multibyte || !encodep && (to - from) != (to_byte - from_byte)))
4242 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4243
4244 /* Update various buffer positions for the new text. */
4245 GAP_SIZE -= inserted_byte;
4246 ZV += inserted; Z+= inserted;
4247 ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
4248 GPT += inserted; GPT_BYTE += inserted_byte;
4037 4249
4038 if (adjust) 4250 if (adjust)
4039 { 4251 {
4040 adjust_after_replace (from, from_byte, to, to_byte, 4252 adjust_after_replace (from, from_byte, to, to_byte,
4041 inserted, inserted_byte); 4253 inserted, inserted_byte);
4053 inserted = XFASTINT (val); 4265 inserted = XFASTINT (val);
4054 } 4266 }
4055 if (pos >= from + orig_inserted) 4267 if (pos >= from + orig_inserted)
4056 temp_set_point (current_buffer, pos + (inserted - orig_inserted)); 4268 temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4057 } 4269 }
4058 } 4270 signal_after_change (from, to - from, inserted);
4059 4271 }
4060 return ((from_byte - from_byte_orig) + inserted + (to_byte_orig - to_byte)); 4272
4273 {
4274 int skip = (to_byte_orig - to_byte) + (from_byte - from_byte_orig);
4275
4276 coding->consumed = to_byte_orig - from_byte_orig;
4277 coding->consumed_char = skip + (to - from);
4278 coding->produced = skip + inserted_byte;
4279 coding->produced_char = skip + inserted;
4280 }
4281 return 0;
4061 } 4282 }
4062 4283
4063 Lisp_Object 4284 Lisp_Object
4064 code_convert_string (str, coding, encodep, nocopy) 4285 code_convert_string (str, coding, encodep, nocopy)
4065 Lisp_Object str; 4286 Lisp_Object str;
4093 unibyte<->multibyte conversion. */ 4314 unibyte<->multibyte conversion. */
4094 current_buffer->enable_multibyte_characters = Qnil; 4315 current_buffer->enable_multibyte_characters = Qnil;
4095 insert_from_string (str, 0, 0, to_byte, to_byte, 0); 4316 insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4096 current_buffer->enable_multibyte_characters = Qt; 4317 current_buffer->enable_multibyte_characters = Qt;
4097 } 4318 }
4098 code_convert_region (BEGV, ZV, coding, encodep, 1); 4319 code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4099 if (encodep) 4320 if (encodep)
4100 /* We must return the buffer contents as unibyte string. */ 4321 /* We must return the buffer contents as unibyte string. */
4101 current_buffer->enable_multibyte_characters = Qnil; 4322 current_buffer->enable_multibyte_characters = Qnil;
4102 str = make_buffer_string (BEGV, ZV, 0); 4323 str = make_buffer_string (BEGV, ZV, 0);
4103 set_buffer_internal (prev); 4324 set_buffer_internal (prev);
4375 4596
4376 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 4597 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4377 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); 4598 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4378 4599
4379 coding.mode |= CODING_MODE_LAST_BLOCK; 4600 coding.mode |= CODING_MODE_LAST_BLOCK;
4380 len = code_convert_region (from, to, &coding, encodep, 1); 4601 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4381 return make_number (len); 4602 &coding, encodep, 1);
4603 return make_number (coding.produced_char);
4382 } 4604 }
4383 4605
4384 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, 4606 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4385 3, 3, "r\nzCoding system: ", 4607 3, 3, "r\nzCoding system: ",
4386 "Decode the current region by specified coding system.\n\ 4608 "Decode the current region by specified coding system.\n\