Mercurial > emacs
comparison src/coding.c @ 20931:068eb408c911
(decode_coding_iso2022): Update coding->fake_multibyte.
(ENCODE_SINGLE_SHIFT_2, ENCODE_SINGLE_SHIFT_3,
encode_coding_iso2022, decode_coding_sjis_big5,
encode_coding_sjis_big5, decode_eol, encode_eol, decode_coding,
encode_coding): Likewise.
(shrink_decoding_region, shrink_encoding_region): Do not skip
non-ASCII code in any cases. Bug fix for getting starting address
from BEG.
(code_convert_region): Sync character positions correctly by
paying attention to coding->fake_multibyte.
(code_convert_string): Set number of character and bytes just
processed in members of CODING.
(code_convert_string): Adjusted for the change of
code_convert_region.
(code_convert_region1): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 20 Feb 1998 01:40:47 +0000 |
parents | 0fa2183c587d |
children | e4dd62e5d921 |
comparison
equal
deleted
inserted
replaced
20930:1331679fe704 | 20931:068eb408c911 |
---|---|
1005 | 1005 |
1006 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 1006 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
1007 unification_table = Vstandard_character_unification_table_for_decode; | 1007 unification_table = Vstandard_character_unification_table_for_decode; |
1008 | 1008 |
1009 coding->produced_char = 0; | 1009 coding->produced_char = 0; |
1010 coding->fake_multibyte = 0; | |
1010 while (src < src_end && (dst_bytes | 1011 while (src < src_end && (dst_bytes |
1011 ? (dst < adjusted_dst_end) | 1012 ? (dst < adjusted_dst_end) |
1012 : (dst < src - 6))) | 1013 : (dst < src - 6))) |
1013 { | 1014 { |
1014 /* SRC_BASE remembers the start position in source in each loop. | 1015 /* SRC_BASE remembers the start position in source in each loop. |
1044 break; | 1045 break; |
1045 | 1046 |
1046 case ISO_0xA0_or_0xFF: | 1047 case ISO_0xA0_or_0xFF: |
1047 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 | 1048 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 |
1048 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1049 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) |
1049 { | 1050 goto label_invalid_code; |
1050 /* Invalid code. */ | |
1051 *dst++ = c1; | |
1052 coding->produced_char++; | |
1053 break; | |
1054 } | |
1055 /* This is a graphic character, we fall down ... */ | 1051 /* This is a graphic character, we fall down ... */ |
1056 | 1052 |
1057 case ISO_graphic_plane_1: | 1053 case ISO_graphic_plane_1: |
1058 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1054 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) |
1059 { | 1055 goto label_invalid_code; |
1060 /* Invalid code. */ | |
1061 *dst++ = c1; | |
1062 coding->produced_char++; | |
1063 } | |
1064 else | 1056 else |
1065 DECODE_ISO_CHARACTER (charset1, c1); | 1057 DECODE_ISO_CHARACTER (charset1, c1); |
1066 break; | 1058 break; |
1067 | 1059 |
1068 case ISO_control_code: | 1060 case ISO_control_code: |
1308 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1300 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
1309 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); | 1301 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); |
1310 break; | 1302 break; |
1311 | 1303 |
1312 label_invalid_code: | 1304 label_invalid_code: |
1313 coding->produced_char += src - src_base; | |
1314 while (src_base < src) | 1305 while (src_base < src) |
1315 *dst++ = *src_base++; | 1306 *dst++ = *src_base++; |
1307 coding->fake_multibyte = 1; | |
1316 } | 1308 } |
1317 continue; | 1309 continue; |
1318 | 1310 |
1319 label_end_of_loop: | 1311 label_end_of_loop: |
1320 result = CODING_FINISH_INSUFFICIENT_SRC; | 1312 result = CODING_FINISH_INSUFFICIENT_SRC; |
1321 label_end_of_loop_2: | 1313 label_end_of_loop_2: |
1322 src = src_base; | 1314 src = src_base; |
1323 break; | 1315 break; |
1324 } | 1316 } |
1325 | 1317 |
1326 if (result == CODING_FINISH_NORMAL | 1318 if (src < src_end) |
1327 && src < src_end) | 1319 { |
1328 result = CODING_FINISH_INSUFFICIENT_DST; | 1320 if (result == CODING_FINISH_NORMAL) |
1329 | 1321 result = CODING_FINISH_INSUFFICIENT_DST; |
1330 /* If this is the last block of the text to be decoded, we had | 1322 else if (result != CODING_FINISH_INCONSISTENT_EOL |
1331 better just flush out all remaining codes in the text although | 1323 && coding->mode & CODING_MODE_LAST_BLOCK) |
1332 they are not valid characters. */ | 1324 { |
1333 if (coding->mode & CODING_MODE_LAST_BLOCK) | 1325 /* This is the last block of the text to be decoded. We had |
1334 { | 1326 better just flush out all remaining codes in the text |
1335 bcopy (src, dst, src_end - src); | 1327 although they are not valid characters. */ |
1336 dst += (src_end - src); | 1328 src_bytes = src_end - src; |
1337 src = src_end; | 1329 if (dst_bytes && (dst_end - dst < src_bytes)) |
1338 } | 1330 src_bytes = dst_end - dst; |
1331 bcopy (src, dst, src_bytes); | |
1332 dst += src_bytes; | |
1333 src += src_bytes; | |
1334 coding->fake_multibyte = 1; | |
1335 } | |
1336 } | |
1337 | |
1339 coding->consumed = coding->consumed_char = src - source; | 1338 coding->consumed = coding->consumed_char = src - source; |
1340 coding->produced = dst - destination; | 1339 coding->produced = dst - destination; |
1341 return result; | 1340 return result; |
1342 } | 1341 } |
1343 | 1342 |
1411 #define ENCODE_SINGLE_SHIFT_2 \ | 1410 #define ENCODE_SINGLE_SHIFT_2 \ |
1412 do { \ | 1411 do { \ |
1413 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1412 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
1414 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ | 1413 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ |
1415 else \ | 1414 else \ |
1416 *dst++ = ISO_CODE_SS2; \ | 1415 { \ |
1416 *dst++ = ISO_CODE_SS2; \ | |
1417 coding->fake_multibyte = 1; \ | |
1418 } \ | |
1417 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1419 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
1418 } while (0) | 1420 } while (0) |
1419 | 1421 |
1420 #define ENCODE_SINGLE_SHIFT_3 \ | 1422 #define ENCODE_SINGLE_SHIFT_3 \ |
1421 do { \ | 1423 do { \ |
1422 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1424 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
1423 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ | 1425 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ |
1424 else \ | 1426 else \ |
1425 *dst++ = ISO_CODE_SS3; \ | 1427 { \ |
1428 *dst++ = ISO_CODE_SS3; \ | |
1429 coding->fake_multibyte = 1; \ | |
1430 } \ | |
1426 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1431 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
1427 } while (0) | 1432 } while (0) |
1428 | 1433 |
1429 /* The following four macros produce codes (control character or | 1434 /* The following four macros produce codes (control character or |
1430 escape sequence) for ISO2022 locking-shift functions (shift-in, | 1435 escape sequence) for ISO2022 locking-shift functions (shift-in, |
1744 | 1749 |
1745 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 1750 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
1746 unification_table = Vstandard_character_unification_table_for_encode; | 1751 unification_table = Vstandard_character_unification_table_for_encode; |
1747 | 1752 |
1748 coding->consumed_char = 0; | 1753 coding->consumed_char = 0; |
1754 coding->fake_multibyte = 0; | |
1749 while (src < src_end && (dst_bytes | 1755 while (src < src_end && (dst_bytes |
1750 ? (dst < adjusted_dst_end) | 1756 ? (dst < adjusted_dst_end) |
1751 : (dst < src - 19))) | 1757 : (dst < src - 19))) |
1752 { | 1758 { |
1753 /* SRC_BASE remembers the start position in source in each loop. | 1759 /* SRC_BASE remembers the start position in source in each loop. |
1931 result = CODING_FINISH_INSUFFICIENT_SRC; | 1937 result = CODING_FINISH_INSUFFICIENT_SRC; |
1932 src = src_base; | 1938 src = src_base; |
1933 break; | 1939 break; |
1934 } | 1940 } |
1935 | 1941 |
1936 if (result == CODING_FINISH_NORMAL | 1942 if (src < src_end) |
1937 && src < src_end) | 1943 { |
1938 result = CODING_FINISH_INSUFFICIENT_DST; | 1944 if (result == CODING_FINISH_NORMAL) |
1939 | 1945 result = CODING_FINISH_INSUFFICIENT_DST; |
1940 /* If this is the last block of the text to be encoded, we must | 1946 else |
1941 reset graphic planes and registers to the initial state, and | 1947 /* If this is the last block of the text to be encoded, we |
1942 flush out the carryover if any. */ | 1948 must reset graphic planes and registers to the initial |
1943 if (coding->mode & CODING_MODE_LAST_BLOCK) | 1949 state, and flush out the carryover if any. */ |
1944 ENCODE_RESET_PLANE_AND_REGISTER; | 1950 if (coding->mode & CODING_MODE_LAST_BLOCK) |
1951 ENCODE_RESET_PLANE_AND_REGISTER; | |
1952 } | |
1945 | 1953 |
1946 coding->consumed = src - source; | 1954 coding->consumed = src - source; |
1947 coding->produced = coding->produced_char = dst - destination; | 1955 coding->produced = coding->produced_char = dst - destination; |
1948 return result; | 1956 return result; |
1949 } | 1957 } |
2052 else if (CHARSET_DIMENSION (charset_alt) == 1) \ | 2060 else if (CHARSET_DIMENSION (charset_alt) == 1) \ |
2053 { \ | 2061 { \ |
2054 if (sjis_p && charset_alt == charset_katakana_jisx0201) \ | 2062 if (sjis_p && charset_alt == charset_katakana_jisx0201) \ |
2055 *dst++ = c1; \ | 2063 *dst++ = c1; \ |
2056 else \ | 2064 else \ |
2057 *dst++ = charset_alt, *dst++ = c1; \ | 2065 { \ |
2066 *dst++ = charset_alt, *dst++ = c1; \ | |
2067 coding->fake_multibyte = 1; \ | |
2068 } \ | |
2058 } \ | 2069 } \ |
2059 else \ | 2070 else \ |
2060 { \ | 2071 { \ |
2061 c1 &= 0x7F, c2 &= 0x7F; \ | 2072 c1 &= 0x7F, c2 &= 0x7F; \ |
2062 if (sjis_p && charset_alt == charset_jisx0208) \ | 2073 if (sjis_p && charset_alt == charset_jisx0208) \ |
2063 { \ | 2074 { \ |
2064 unsigned char s1, s2; \ | 2075 unsigned char s1, s2; \ |
2065 \ | 2076 \ |
2066 ENCODE_SJIS (c1, c2, s1, s2); \ | 2077 ENCODE_SJIS (c1, c2, s1, s2); \ |
2067 *dst++ = s1, *dst++ = s2; \ | 2078 *dst++ = s1, *dst++ = s2; \ |
2079 coding->fake_multibyte = 1; \ | |
2068 } \ | 2080 } \ |
2069 else if (!sjis_p \ | 2081 else if (!sjis_p \ |
2070 && (charset_alt == charset_big5_1 \ | 2082 && (charset_alt == charset_big5_1 \ |
2071 || charset_alt == charset_big5_2)) \ | 2083 || charset_alt == charset_big5_2)) \ |
2072 { \ | 2084 { \ |
2073 unsigned char b1, b2; \ | 2085 unsigned char b1, b2; \ |
2074 \ | 2086 \ |
2075 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ | 2087 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ |
2076 *dst++ = b1, *dst++ = b2; \ | 2088 *dst++ = b1, *dst++ = b2; \ |
2077 } \ | 2089 } \ |
2078 else \ | 2090 else \ |
2079 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | 2091 { \ |
2092 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | |
2093 coding->fake_multibyte = 1; \ | |
2094 } \ | |
2080 } \ | 2095 } \ |
2081 coding->consumed_char++; \ | 2096 coding->consumed_char++; \ |
2082 } while (0); | 2097 } while (0); |
2083 | 2098 |
2084 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2099 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
2153 | 2168 |
2154 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 2169 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
2155 unification_table = Vstandard_character_unification_table_for_decode; | 2170 unification_table = Vstandard_character_unification_table_for_decode; |
2156 | 2171 |
2157 coding->produced_char = 0; | 2172 coding->produced_char = 0; |
2173 coding->fake_multibyte = 0; | |
2158 while (src < src_end && (dst_bytes | 2174 while (src < src_end && (dst_bytes |
2159 ? (dst < adjusted_dst_end) | 2175 ? (dst < adjusted_dst_end) |
2160 : (dst < src - 3))) | 2176 : (dst < src - 3))) |
2161 { | 2177 { |
2162 /* SRC_BASE remembers the start position in source in each loop. | 2178 /* SRC_BASE remembers the start position in source in each loop. |
2201 *dst++ = c1; | 2217 *dst++ = c1; |
2202 coding->produced_char++; | 2218 coding->produced_char++; |
2203 } | 2219 } |
2204 else if (c1 < 0x80) | 2220 else if (c1 < 0x80) |
2205 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | 2221 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); |
2206 else if (c1 < 0xA0 || c1 >= 0xE0) | 2222 else if (c1 < 0xA0) |
2207 { | 2223 { |
2208 /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */ | 2224 /* SJIS -> JISX0208 */ |
2209 if (sjis_p) | 2225 if (sjis_p) |
2210 { | 2226 { |
2211 ONE_MORE_BYTE (c2); | 2227 ONE_MORE_BYTE (c2); |
2212 DECODE_SJIS (c1, c2, c3, c4); | 2228 if (c2 >= 0x40) |
2213 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | 2229 { |
2230 DECODE_SJIS (c1, c2, c3, c4); | |
2231 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
2232 } | |
2233 else | |
2234 goto label_invalid_code_2; | |
2214 } | 2235 } |
2215 else if (c1 >= 0xE0 && c1 < 0xFF) | 2236 else |
2216 { | 2237 goto label_invalid_code_1; |
2217 int charset; | 2238 } |
2218 | 2239 else if (c1 < 0xE0) |
2219 ONE_MORE_BYTE (c2); | |
2220 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
2221 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
2222 } | |
2223 else /* Invalid code */ | |
2224 { | |
2225 *dst++ = c1; | |
2226 coding->produced_char++; | |
2227 } | |
2228 } | |
2229 else | |
2230 { | 2240 { |
2231 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ | 2241 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ |
2232 if (sjis_p) | 2242 if (sjis_p) |
2233 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | 2243 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, |
2234 /* dummy */ c2); | 2244 /* dummy */ c2); |
2235 else | 2245 else |
2236 { | 2246 { |
2237 int charset; | 2247 int charset; |
2238 | 2248 |
2239 ONE_MORE_BYTE (c2); | 2249 ONE_MORE_BYTE (c2); |
2240 DECODE_BIG5 (c1, c2, charset, c3, c4); | 2250 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) |
2241 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2251 { |
2252 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
2253 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
2254 } | |
2255 else | |
2256 goto label_invalid_code_2; | |
2242 } | 2257 } |
2243 } | 2258 } |
2259 else /* C1 >= 0xE0 */ | |
2260 { | |
2261 /* SJIS -> JISX0208, BIG5 -> Big5 */ | |
2262 if (sjis_p) | |
2263 { | |
2264 ONE_MORE_BYTE (c2); | |
2265 if (c2 >= 0x40) | |
2266 { | |
2267 DECODE_SJIS (c1, c2, c3, c4); | |
2268 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
2269 } | |
2270 else | |
2271 goto label_invalid_code_2; | |
2272 } | |
2273 else | |
2274 { | |
2275 int charset; | |
2276 | |
2277 ONE_MORE_BYTE (c2); | |
2278 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | |
2279 { | |
2280 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
2281 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
2282 } | |
2283 else | |
2284 goto label_invalid_code_2; | |
2285 } | |
2286 } | |
2287 continue; | |
2288 | |
2289 label_invalid_code_1: | |
2290 *dst++ = c1; | |
2291 coding->produced_char++; | |
2292 coding->fake_multibyte = 1; | |
2293 continue; | |
2294 | |
2295 label_invalid_code_2: | |
2296 *dst++ = c1; *dst++= c2; | |
2297 coding->produced_char += 2; | |
2298 coding->fake_multibyte = 1; | |
2244 continue; | 2299 continue; |
2245 | 2300 |
2246 label_end_of_loop: | 2301 label_end_of_loop: |
2247 result = CODING_FINISH_INSUFFICIENT_SRC; | 2302 result = CODING_FINISH_INSUFFICIENT_SRC; |
2248 label_end_of_loop_2: | 2303 label_end_of_loop_2: |
2249 src = src_base; | 2304 src = src_base; |
2250 break; | 2305 break; |
2251 } | 2306 } |
2252 | 2307 |
2253 if (result == CODING_FINISH_NORMAL | 2308 if (src < src_end) |
2254 && src < src_end) | 2309 { |
2255 result = CODING_FINISH_INSUFFICIENT_DST; | 2310 if (result == CODING_FINISH_NORMAL) |
2311 result = CODING_FINISH_INSUFFICIENT_DST; | |
2312 else if (result != CODING_FINISH_INCONSISTENT_EOL | |
2313 && coding->mode & CODING_MODE_LAST_BLOCK) | |
2314 { | |
2315 src_bytes = src_end - src; | |
2316 if (dst_bytes && (dst_end - dst < src_bytes)) | |
2317 src_bytes = dst_end - dst; | |
2318 bcopy (dst, src, src_bytes); | |
2319 src += src_bytes; | |
2320 dst += src_bytes; | |
2321 coding->fake_multibyte = 1; | |
2322 } | |
2323 } | |
2256 | 2324 |
2257 coding->consumed = coding->consumed_char = src - source; | 2325 coding->consumed = coding->consumed_char = src - source; |
2258 coding->produced = dst - destination; | 2326 coding->produced = dst - destination; |
2259 return result; | 2327 return result; |
2260 } | 2328 } |
2289 | 2357 |
2290 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 2358 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
2291 unification_table = Vstandard_character_unification_table_for_encode; | 2359 unification_table = Vstandard_character_unification_table_for_encode; |
2292 | 2360 |
2293 coding->consumed_char = 0; | 2361 coding->consumed_char = 0; |
2362 coding->fake_multibyte = 0; | |
2294 while (src < src_end && (dst_bytes | 2363 while (src < src_end && (dst_bytes |
2295 ? (dst < adjusted_dst_end) | 2364 ? (dst < adjusted_dst_end) |
2296 : (dst < src - 1))) | 2365 : (dst < src - 1))) |
2297 { | 2366 { |
2298 /* SRC_BASE remembers the start position in source in each loop. | 2367 /* SRC_BASE remembers the start position in source in each loop. |
2400 { | 2469 { |
2401 unsigned char *src = source; | 2470 unsigned char *src = source; |
2402 unsigned char *src_end = source + src_bytes; | 2471 unsigned char *src_end = source + src_bytes; |
2403 unsigned char *dst = destination; | 2472 unsigned char *dst = destination; |
2404 unsigned char *dst_end = destination + dst_bytes; | 2473 unsigned char *dst_end = destination + dst_bytes; |
2474 unsigned char c; | |
2405 int result = CODING_FINISH_NORMAL; | 2475 int result = CODING_FINISH_NORMAL; |
2476 | |
2477 coding->fake_multibyte = 0; | |
2406 | 2478 |
2407 if (src_bytes <= 0) | 2479 if (src_bytes <= 0) |
2408 return result; | 2480 return result; |
2409 | 2481 |
2410 switch (coding->eol_type) | 2482 switch (coding->eol_type) |
2419 while (src < src_end && (dst_bytes | 2491 while (src < src_end && (dst_bytes |
2420 ? (dst < adjusted_dst_end) | 2492 ? (dst < adjusted_dst_end) |
2421 : (dst < src - 1))) | 2493 : (dst < src - 1))) |
2422 { | 2494 { |
2423 unsigned char *src_base = src; | 2495 unsigned char *src_base = src; |
2424 unsigned char c = *src++; | 2496 |
2497 c = *src++; | |
2425 if (c == '\r') | 2498 if (c == '\r') |
2426 { | 2499 { |
2427 ONE_MORE_BYTE (c); | 2500 ONE_MORE_BYTE (c); |
2428 if (c != '\n') | 2501 if (c != '\n') |
2429 { | 2502 { |
2431 { | 2504 { |
2432 result = CODING_FINISH_INCONSISTENT_EOL; | 2505 result = CODING_FINISH_INCONSISTENT_EOL; |
2433 goto label_end_of_loop_2; | 2506 goto label_end_of_loop_2; |
2434 } | 2507 } |
2435 *dst++ = '\r'; | 2508 *dst++ = '\r'; |
2509 if (BASE_LEADING_CODE_P (c)) | |
2510 coding->fake_multibyte = 1; | |
2436 } | 2511 } |
2437 *dst++ = c; | 2512 *dst++ = c; |
2438 } | 2513 } |
2439 else if (c == '\n' | 2514 else if (c == '\n' |
2440 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) | 2515 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) |
2441 { | 2516 { |
2442 result = CODING_FINISH_INCONSISTENT_EOL; | 2517 result = CODING_FINISH_INCONSISTENT_EOL; |
2443 goto label_end_of_loop_2; | 2518 goto label_end_of_loop_2; |
2444 } | 2519 } |
2445 else | 2520 else |
2446 *dst++ = c; | 2521 { |
2522 *dst++ = c; | |
2523 if (BASE_LEADING_CODE_P (c)) | |
2524 coding->fake_multibyte = 1; | |
2525 } | |
2447 continue; | 2526 continue; |
2448 | 2527 |
2449 label_end_of_loop: | 2528 label_end_of_loop: |
2450 result = CODING_FINISH_INSUFFICIENT_SRC; | 2529 result = CODING_FINISH_INSUFFICIENT_SRC; |
2451 label_end_of_loop_2: | 2530 label_end_of_loop_2: |
2459 break; | 2538 break; |
2460 | 2539 |
2461 case CODING_EOL_CR: | 2540 case CODING_EOL_CR: |
2462 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | 2541 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) |
2463 { | 2542 { |
2464 while (src < src_end) if (*src++ == '\n') break; | 2543 while (src < src_end) |
2544 { | |
2545 if ((c = *src++) == '\n') | |
2546 break; | |
2547 if (BASE_LEADING_CODE_P (c)) | |
2548 coding->fake_multibyte = 1; | |
2549 } | |
2465 if (*--src == '\n') | 2550 if (*--src == '\n') |
2466 { | 2551 { |
2467 src_bytes = src - source; | 2552 src_bytes = src - source; |
2468 result = CODING_FINISH_INCONSISTENT_EOL; | 2553 result = CODING_FINISH_INCONSISTENT_EOL; |
2469 } | 2554 } |
2491 bcopy (source, destination, src_bytes); | 2576 bcopy (source, destination, src_bytes); |
2492 else | 2577 else |
2493 safe_bcopy (source, destination, src_bytes); | 2578 safe_bcopy (source, destination, src_bytes); |
2494 src += src_bytes; | 2579 src += src_bytes; |
2495 dst += dst_bytes; | 2580 dst += dst_bytes; |
2581 coding->fake_multibyte = 1; | |
2496 break; | 2582 break; |
2497 } | 2583 } |
2498 | 2584 |
2499 coding->consumed = coding->consumed_char = src - source; | 2585 coding->consumed = coding->consumed_char = src - source; |
2500 coding->produced = coding->produced_char = dst - destination; | 2586 coding->produced = coding->produced_char = dst - destination; |
2512 int src_bytes, dst_bytes; | 2598 int src_bytes, dst_bytes; |
2513 { | 2599 { |
2514 unsigned char *src = source; | 2600 unsigned char *src = source; |
2515 unsigned char *dst = destination; | 2601 unsigned char *dst = destination; |
2516 int result = CODING_FINISH_NORMAL; | 2602 int result = CODING_FINISH_NORMAL; |
2603 | |
2604 coding->fake_multibyte = 0; | |
2517 | 2605 |
2518 if (coding->eol_type == CODING_EOL_CRLF) | 2606 if (coding->eol_type == CODING_EOL_CRLF) |
2519 { | 2607 { |
2520 unsigned char c; | 2608 unsigned char c; |
2521 unsigned char *src_end = source + src_bytes; | 2609 unsigned char *src_end = source + src_bytes; |
2532 c = *src++; | 2620 c = *src++; |
2533 if (c == '\n' | 2621 if (c == '\n' |
2534 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) | 2622 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) |
2535 *dst++ = '\r', *dst++ = '\n'; | 2623 *dst++ = '\r', *dst++ = '\n'; |
2536 else | 2624 else |
2537 *dst++ = c; | 2625 { |
2626 *dst++ = c; | |
2627 if (BASE_LEADING_CODE_P (c)) | |
2628 coding->fake_multibyte = 1; | |
2629 } | |
2538 } | 2630 } |
2539 if (src < src_end) | 2631 if (src < src_end) |
2540 result = CODING_FINISH_INSUFFICIENT_DST; | 2632 result = CODING_FINISH_INSUFFICIENT_DST; |
2541 } | 2633 } |
2542 else | 2634 else |
2543 { | 2635 { |
2636 unsigned char c; | |
2637 | |
2544 if (dst_bytes && src_bytes > dst_bytes) | 2638 if (dst_bytes && src_bytes > dst_bytes) |
2545 { | 2639 { |
2546 src_bytes = dst_bytes; | 2640 src_bytes = dst_bytes; |
2547 result = CODING_FINISH_INSUFFICIENT_DST; | 2641 result = CODING_FINISH_INSUFFICIENT_DST; |
2548 } | 2642 } |
2549 if (dst_bytes) | 2643 if (dst_bytes) |
2550 bcopy (source, destination, src_bytes); | 2644 bcopy (source, destination, src_bytes); |
2551 else | 2645 else |
2552 safe_bcopy (source, destination, src_bytes); | 2646 { |
2647 safe_bcopy (source, destination, src_bytes); | |
2648 dst_bytes = src_bytes; | |
2649 } | |
2553 if (coding->eol_type == CODING_EOL_CRLF) | 2650 if (coding->eol_type == CODING_EOL_CRLF) |
2554 { | 2651 { |
2555 while (src_bytes--) | 2652 while (src_bytes--) |
2556 if (*dst++ == '\n') dst[-1] = '\r'; | 2653 { |
2557 } | 2654 if ((c = *dst++) == '\n') |
2558 else if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) | 2655 dst[-1] = '\r'; |
2559 { | 2656 else if (BASE_LEADING_CODE_P (c)) |
2560 while (src_bytes--) | 2657 coding->fake_multibyte = 1; |
2561 if (*dst++ == '\r') dst[-1] = '\n'; | 2658 } |
2562 } | 2659 } |
2563 src += src_bytes; | 2660 else |
2564 dst += src_bytes; | 2661 { |
2662 if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) | |
2663 { | |
2664 while (src_bytes--) | |
2665 if (*dst++ == '\r') dst[-1] = '\n'; | |
2666 } | |
2667 coding->fake_multibyte = 1; | |
2668 } | |
2669 src = source + dst_bytes; | |
2670 dst = destination + dst_bytes; | |
2565 } | 2671 } |
2566 | 2672 |
2567 coding->consumed = coding->consumed_char = src - source; | 2673 coding->consumed = coding->consumed_char = src - source; |
2568 coding->produced = coding->produced_char = dst - destination; | 2674 coding->produced = coding->produced_char = dst - destination; |
2569 return result; | 2675 return result; |
3456 | 3562 |
3457 if (src_bytes <= 0) | 3563 if (src_bytes <= 0) |
3458 { | 3564 { |
3459 coding->produced = coding->produced_char = 0; | 3565 coding->produced = coding->produced_char = 0; |
3460 coding->consumed = coding->consumed_char = 0; | 3566 coding->consumed = coding->consumed_char = 0; |
3567 coding->fake_multibyte = 0; | |
3461 return CODING_FINISH_NORMAL; | 3568 return CODING_FINISH_NORMAL; |
3462 } | 3569 } |
3463 | 3570 |
3464 if (coding->type == coding_type_undecided) | 3571 if (coding->type == coding_type_undecided) |
3465 detect_coding (coding, source, src_bytes); | 3572 detect_coding (coding, source, src_bytes); |
3512 } | 3619 } |
3513 if (dst_bytes) | 3620 if (dst_bytes) |
3514 bcopy (source, destination, coding->produced); | 3621 bcopy (source, destination, coding->produced); |
3515 else | 3622 else |
3516 safe_bcopy (source, destination, coding->produced); | 3623 safe_bcopy (source, destination, coding->produced); |
3624 coding->fake_multibyte = 1; | |
3517 coding->consumed | 3625 coding->consumed |
3518 = coding->consumed_char = coding->produced_char = coding->produced; | 3626 = coding->consumed_char = coding->produced_char = coding->produced; |
3519 break; | 3627 break; |
3520 } | 3628 } |
3521 | 3629 |
3534 | 3642 |
3535 if (src_bytes <= 0) | 3643 if (src_bytes <= 0) |
3536 { | 3644 { |
3537 coding->produced = coding->produced_char = 0; | 3645 coding->produced = coding->produced_char = 0; |
3538 coding->consumed = coding->consumed_char = 0; | 3646 coding->consumed = coding->consumed_char = 0; |
3647 coding->fake_multibyte = 0; | |
3539 return CODING_FINISH_NORMAL; | 3648 return CODING_FINISH_NORMAL; |
3540 } | 3649 } |
3541 | 3650 |
3542 switch (coding->type) | 3651 switch (coding->type) |
3543 { | 3652 { |
3590 { | 3699 { |
3591 unsigned char *p = destination, *pend = p + coding->produced; | 3700 unsigned char *p = destination, *pend = p + coding->produced; |
3592 while (p < pend) | 3701 while (p < pend) |
3593 if (*p++ == '\015') p[-1] = '\n'; | 3702 if (*p++ == '\015') p[-1] = '\n'; |
3594 } | 3703 } |
3704 coding->fake_multibyte = 1; | |
3595 coding->consumed | 3705 coding->consumed |
3596 = coding->consumed_char = coding->produced_char = coding->produced; | 3706 = coding->consumed_char = coding->produced_char = coding->produced; |
3597 break; | 3707 break; |
3598 } | 3708 } |
3599 | 3709 |
3600 return result; | 3710 return result; |
3601 } | 3711 } |
3602 | 3712 |
3603 /* Scan text in the region between *BEG and *END, skip characters | 3713 /* Scan text in the region between *BEG and *END (byte positions), |
3604 which we don't have to decode by coding system CODING at the head | 3714 skip characters which we don't have to decode by coding system |
3605 and tail, then set *BEG and *END to the region of the text we | 3715 CODING at the head and tail, then set *BEG and *END to the region |
3606 actually have to convert. | 3716 of the text we actually have to convert. The caller should move |
3717 the gap out of the region in advance. | |
3607 | 3718 |
3608 If STR is not NULL, *BEG and *END are indices into STR. */ | 3719 If STR is not NULL, *BEG and *END are indices into STR. */ |
3609 | 3720 |
3610 static void | 3721 static void |
3611 shrink_decoding_region (beg, end, coding, str) | 3722 shrink_decoding_region (beg, end, coding, str) |
3612 int *beg, *end; | 3723 int *beg, *end; |
3613 struct coding_system *coding; | 3724 struct coding_system *coding; |
3614 unsigned char *str; | 3725 unsigned char *str; |
3615 { | 3726 { |
3616 unsigned char *begp_orig, *begp, *endp_orig, *endp; | 3727 unsigned char *begp_orig, *begp, *endp_orig, *endp, c; |
3617 int eol_conversion; | 3728 int eol_conversion; |
3618 | 3729 |
3619 if (coding->type == coding_type_ccl | 3730 if (coding->type == coding_type_ccl |
3620 || coding->type == coding_type_undecided | 3731 || coding->type == coding_type_undecided |
3621 || !NILP (coding->post_read_conversion)) | 3732 || !NILP (coding->post_read_conversion)) |
3623 /* We can't skip any data. */ | 3734 /* We can't skip any data. */ |
3624 return; | 3735 return; |
3625 } | 3736 } |
3626 else if (coding->type == coding_type_no_conversion) | 3737 else if (coding->type == coding_type_no_conversion) |
3627 { | 3738 { |
3628 /* We need no conversion. */ | 3739 /* We need no conversion, but don't have to skip any data here. |
3629 *beg = *end; | 3740 Decoding routine handles them effectively anyway. */ |
3630 return; | 3741 return; |
3631 } | 3742 } |
3632 | 3743 |
3633 if (coding->heading_ascii >= 0) | 3744 if (coding->heading_ascii >= 0) |
3634 /* Detection routine has already found how much we can skip at the | 3745 /* Detection routine has already found how much we can skip at the |
3640 begp_orig = begp = str + *beg; | 3751 begp_orig = begp = str + *beg; |
3641 endp_orig = endp = str + *end; | 3752 endp_orig = endp = str + *end; |
3642 } | 3753 } |
3643 else | 3754 else |
3644 { | 3755 { |
3645 move_gap (*beg); | 3756 begp_orig = begp = BYTE_POS_ADDR (*beg); |
3646 begp_orig = begp = GAP_END_ADDR; | |
3647 endp_orig = endp = begp + *end - *beg; | 3757 endp_orig = endp = begp + *end - *beg; |
3648 } | 3758 } |
3649 | 3759 |
3650 eol_conversion = (coding->eol_type != CODING_EOL_LF); | 3760 eol_conversion = (coding->eol_type != CODING_EOL_LF); |
3651 | 3761 |
3654 case coding_type_emacs_mule: | 3764 case coding_type_emacs_mule: |
3655 case coding_type_raw_text: | 3765 case coding_type_raw_text: |
3656 if (eol_conversion) | 3766 if (eol_conversion) |
3657 { | 3767 { |
3658 if (coding->heading_ascii < 0) | 3768 if (coding->heading_ascii < 0) |
3659 while (begp < endp && *begp != '\r') begp++; | 3769 while (begp < endp && *begp != '\r' && *begp < 0x80) begp++; |
3660 while (begp < endp && *(endp - 1) != '\r') endp--; | 3770 while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80) |
3771 endp--; | |
3661 } | 3772 } |
3662 else | 3773 else |
3663 begp = endp; | 3774 begp = endp; |
3664 break; | 3775 break; |
3665 | 3776 |
3684 break; | 3795 break; |
3685 | 3796 |
3686 default: /* i.e. case coding_type_iso2022: */ | 3797 default: /* i.e. case coding_type_iso2022: */ |
3687 if (coding->heading_ascii < 0) | 3798 if (coding->heading_ascii < 0) |
3688 { | 3799 { |
3689 unsigned char c; | |
3690 | |
3691 /* We can skip all ASCII characters at the head except for a | 3800 /* We can skip all ASCII characters at the head except for a |
3692 few control codes. */ | 3801 few control codes. */ |
3693 while (begp < endp && (c = *begp) < 0x80 | 3802 while (begp < endp && (c = *begp) < 0x80 |
3694 && c != ISO_CODE_CR && c != ISO_CODE_SO | 3803 && c != ISO_CODE_CR && c != ISO_CODE_SO |
3695 && c != ISO_CODE_SI && c != ISO_CODE_ESC | 3804 && c != ISO_CODE_SI && c != ISO_CODE_ESC |
3700 { | 3809 { |
3701 case CODING_CATEGORY_IDX_ISO_8_1: | 3810 case CODING_CATEGORY_IDX_ISO_8_1: |
3702 case CODING_CATEGORY_IDX_ISO_8_2: | 3811 case CODING_CATEGORY_IDX_ISO_8_2: |
3703 /* We can skip all ASCII characters at the tail. */ | 3812 /* We can skip all ASCII characters at the tail. */ |
3704 if (eol_conversion) | 3813 if (eol_conversion) |
3705 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--; | 3814 while (begp < endp && (c = endp[-1]) < 0x80 && c != '\n') endp--; |
3706 else | 3815 else |
3707 while (begp < endp && endp[-1] < 0x80) endp--; | 3816 while (begp < endp && endp[-1] < 0x80) endp--; |
3708 break; | 3817 break; |
3709 | 3818 |
3710 case CODING_CATEGORY_IDX_ISO_7: | 3819 case CODING_CATEGORY_IDX_ISO_7: |
3711 case CODING_CATEGORY_IDX_ISO_7_TIGHT: | 3820 case CODING_CATEGORY_IDX_ISO_7_TIGHT: |
3712 /* We can skip all charactes at the tail except for ESC and | 3821 /* We can skip all charactes at the tail except for ESC and |
3713 the following 2-byte at the tail. */ | 3822 the following 2-byte at the tail. */ |
3714 if (eol_conversion) | 3823 if (eol_conversion) |
3715 while (begp < endp && endp[-1] != ISO_CODE_ESC && endp[-1] != '\n') | 3824 while (begp < endp |
3825 && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\n') | |
3716 endp--; | 3826 endp--; |
3717 else | 3827 else |
3718 while (begp < endp && endp[-1] != ISO_CODE_ESC) | 3828 while (begp < endp |
3829 && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC) | |
3719 endp--; | 3830 endp--; |
3720 if (begp < endp && endp[-1] == ISO_CODE_ESC) | 3831 if (begp < endp && endp[-1] == ISO_CODE_ESC) |
3721 { | 3832 { |
3722 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') | 3833 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') |
3723 /* This is an ASCII designation sequence. We can | 3834 /* This is an ASCII designation sequence. We can |
3760 begp_orig = begp = str + *beg; | 3871 begp_orig = begp = str + *beg; |
3761 endp_orig = endp = str + *end; | 3872 endp_orig = endp = str + *end; |
3762 } | 3873 } |
3763 else | 3874 else |
3764 { | 3875 { |
3765 move_gap (*beg); | 3876 begp_orig = begp = BYTE_POS_ADDR (*beg); |
3766 begp_orig = begp = GAP_END_ADDR; | |
3767 endp_orig = endp = begp + *end - *beg; | 3877 endp_orig = endp = begp + *end - *beg; |
3768 } | 3878 } |
3769 | 3879 |
3770 eol_conversion = (coding->eol_type == CODING_EOL_CR | 3880 eol_conversion = (coding->eol_type == CODING_EOL_CR |
3771 || coding->eol_type == CODING_EOL_CRLF); | 3881 || coding->eol_type == CODING_EOL_CRLF); |
3819 *end += endp - endp_orig; | 3929 *end += endp - endp_orig; |
3820 return; | 3930 return; |
3821 } | 3931 } |
3822 | 3932 |
3823 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the | 3933 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the |
3824 text from FROM to TO by coding system CODING, and return number of | 3934 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by |
3825 characters in the resulting text. | 3935 coding system CODING, and return the status code of code conversion |
3936 (currently, this value has no meaning). | |
3937 | |
3938 How many characters (and bytes) are converted to how many | |
3939 characters (and bytes) are recorded in members of the structure | |
3940 CODING. | |
3826 | 3941 |
3827 If ADJUST is nonzero, we do various things as if the original text | 3942 If ADJUST is nonzero, we do various things as if the original text |
3828 is deleted and a new text is inserted. See the comments in | 3943 is deleted and a new text is inserted. See the comments in |
3829 replace_range (insdel.c) to know what we are doing. | 3944 replace_range (insdel.c) to know what we are doing. |
3830 | 3945 |
3831 ADJUST nonzero also means that post-read-conversion or | 3946 ADJUST nonzero also means that post-read-conversion or |
3832 pre-write-conversion functions (if any) should be processed. */ | 3947 pre-write-conversion functions (if any) should be processed. */ |
3833 | 3948 |
3834 int | 3949 int |
3835 code_convert_region (from, to, coding, encodep, adjust) | 3950 code_convert_region (from, from_byte, to, to_byte, coding, encodep, adjust) |
3836 int from, to, encodep, adjust; | 3951 int from, from_byte, to, to_byte, encodep, adjust; |
3837 struct coding_system *coding; | 3952 struct coding_system *coding; |
3838 { | 3953 { |
3839 int len = to - from, require, inserted, inserted_byte; | 3954 int len = to - from, len_byte = to_byte - from_byte; |
3840 int from_byte, to_byte, len_byte; | 3955 int require, inserted, inserted_byte; |
3841 int from_byte_orig, to_byte_orig; | 3956 int from_byte_orig, to_byte_orig; |
3842 Lisp_Object saved_coding_symbol = Qnil; | 3957 Lisp_Object saved_coding_symbol = Qnil; |
3958 int multibyte = !NILP (current_buffer->enable_multibyte_characters); | |
3959 int first = 1; | |
3960 int fake_multibyte = 0; | |
3961 unsigned char *src, *dst; | |
3843 | 3962 |
3844 if (adjust) | 3963 if (adjust) |
3845 { | 3964 { |
3965 int saved_from = from; | |
3966 | |
3846 prepare_to_modify_buffer (from, to, &from); | 3967 prepare_to_modify_buffer (from, to, &from); |
3847 to = from + len; | 3968 if (saved_from != from) |
3848 } | 3969 { |
3849 from_byte = CHAR_TO_BYTE (from); to_byte = CHAR_TO_BYTE (to); | 3970 to = from + len; |
3850 len_byte = to_byte - from_byte; | 3971 if (multibyte) |
3972 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to); | |
3973 else | |
3974 from_byte = from, to_byte = to; | |
3975 len_byte = to_byte - from_byte; | |
3976 } | |
3977 } | |
3851 | 3978 |
3852 if (! encodep && CODING_REQUIRE_DETECTION (coding)) | 3979 if (! encodep && CODING_REQUIRE_DETECTION (coding)) |
3853 { | 3980 { |
3854 /* We must detect encoding of text and eol. Even if detection | 3981 /* We must detect encoding of text and eol. Even if detection |
3855 routines can't decide the encoding, we should not let them | 3982 routines can't decide the encoding, we should not let them |
3858 | 3985 |
3859 if (from < GPT && to > GPT) | 3986 if (from < GPT && to > GPT) |
3860 move_gap_both (from, from_byte); | 3987 move_gap_both (from, from_byte); |
3861 if (coding->type == coding_type_undecided) | 3988 if (coding->type == coding_type_undecided) |
3862 { | 3989 { |
3863 detect_coding (coding, BYTE_POS_ADDR (from), len); | 3990 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte); |
3864 if (coding->type == coding_type_undecided) | 3991 if (coding->type == coding_type_undecided) |
3865 coding->type = coding_type_emacs_mule; | 3992 coding->type = coding_type_emacs_mule; |
3866 } | 3993 } |
3867 if (coding->eol_type == CODING_EOL_UNDECIDED) | 3994 if (coding->eol_type == CODING_EOL_UNDECIDED) |
3868 { | 3995 { |
3874 encounter an inconsitent eol format while decoding. */ | 4001 encounter an inconsitent eol format while decoding. */ |
3875 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; | 4002 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; |
3876 } | 4003 } |
3877 } | 4004 } |
3878 | 4005 |
4006 coding->consumed_char = len, coding->consumed = len_byte; | |
4007 | |
3879 if (encodep | 4008 if (encodep |
3880 ? ! CODING_REQUIRE_ENCODING (coding) | 4009 ? ! CODING_REQUIRE_ENCODING (coding) |
3881 : ! CODING_REQUIRE_DECODING (coding)) | 4010 : ! CODING_REQUIRE_DECODING (coding)) |
3882 return len; | 4011 { |
4012 coding->produced = len_byte; | |
4013 if (multibyte) | |
4014 { | |
4015 if (GPT < from || GPT > to) | |
4016 move_gap_both (from, from_byte); | |
4017 coding->produced_char | |
4018 = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte); | |
4019 if (coding->produced_char != len) | |
4020 { | |
4021 int diff = coding->produced_char - len; | |
4022 | |
4023 if (adjust) | |
4024 adjust_before_replace (from, from_byte, to, to_byte); | |
4025 ZV += diff; Z += diff; GPT += diff; | |
4026 if (adjust) | |
4027 adjust_after_replace (from, from_byte, to, to_byte, | |
4028 diff, 0); | |
4029 } | |
4030 } | |
4031 else | |
4032 coding->produced_char = len_byte; | |
4033 return 0; | |
4034 } | |
3883 | 4035 |
3884 /* Now we convert the text. */ | 4036 /* Now we convert the text. */ |
3885 | 4037 |
3886 /* For encoding, we must process pre-write-conversion in advance. */ | 4038 /* For encoding, we must process pre-write-conversion in advance. */ |
3887 if (encodep | 4039 if (encodep |
3898 if (current_buffer != prev) | 4050 if (current_buffer != prev) |
3899 { | 4051 { |
3900 len = ZV - BEGV; | 4052 len = ZV - BEGV; |
3901 new = current_buffer; | 4053 new = current_buffer; |
3902 set_buffer_internal_1 (prev); | 4054 set_buffer_internal_1 (prev); |
3903 del_range (from, to); | 4055 del_range_2 (from, to, from_byte, to_byte); |
3904 insert_from_buffer (new, BEG, len, 0); | 4056 insert_from_buffer (new, BEG, len, 0); |
3905 to = from + len; | 4057 to = from + len; |
3906 to_byte = CHAR_TO_BYTE (to); | 4058 to_byte = multibyte ? CHAR_TO_BYTE (to) : to; |
3907 len_byte = to_byte - from_byte; | 4059 len_byte = to_byte - from_byte; |
3908 } | 4060 } |
3909 } | 4061 } |
3910 | 4062 |
3911 /* Try to skip the heading and tailing ASCIIs. */ | 4063 /* Try to skip the heading and tailing ASCIIs. */ |
3912 from_byte_orig = from_byte; to_byte_orig = to_byte; | 4064 from_byte_orig = from_byte; to_byte_orig = to_byte; |
4065 if (from < GPT && GPT < to) | |
4066 move_gap (from); | |
3913 if (encodep) | 4067 if (encodep) |
3914 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); | 4068 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); |
3915 else | 4069 else |
3916 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); | 4070 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); |
3917 if (from_byte == to_byte) | 4071 if (from_byte == to_byte) |
3918 return len; | 4072 { |
4073 coding->produced = len_byte; | |
4074 coding->produced_char = multibyte ? len : len_byte; | |
4075 return 0; | |
4076 } | |
4077 | |
3919 /* Here, the excluded region by shrinking contains only ASCIIs. */ | 4078 /* Here, the excluded region by shrinking contains only ASCIIs. */ |
3920 from += (from_byte - from_byte_orig); | 4079 from += (from_byte - from_byte_orig); |
3921 to += (to_byte - to_byte_orig); | 4080 to += (to_byte - to_byte_orig); |
3922 len = to - from; | 4081 len = to - from; |
3923 len_byte = to_byte - from_byte; | 4082 len_byte = to_byte - from_byte; |
3924 | 4083 |
3925 /* For converion, we must put the gap before the text to be decoded | 4084 /* For converion, we must put the gap before the text in addition to |
3926 in addition to make the gap larger for efficient decoding. The | 4085 making the gap larger for efficient decoding. The required gap |
3927 required gap size starts from 2000 which is the magic number used | 4086 size starts from 2000 which is the magic number used in make_gap. |
3928 in make_gap. But, after one batch of conversion, it will be | 4087 But, after one batch of conversion, it will be incremented if we |
3929 incremented if we find that it is not enough . */ | 4088 find that it is not enough . */ |
3930 require = 2000; | 4089 require = 2000; |
3931 | 4090 |
3932 if (GAP_SIZE < require) | 4091 if (GAP_SIZE < require) |
3933 make_gap (require - GAP_SIZE); | 4092 make_gap (require - GAP_SIZE); |
3934 move_gap_both (from, from_byte); | 4093 move_gap_both (from, from_byte); |
3940 beg_unchanged = GPT - BEG; | 4099 beg_unchanged = GPT - BEG; |
3941 if (Z - GPT < end_unchanged) | 4100 if (Z - GPT < end_unchanged) |
3942 end_unchanged = Z - GPT; | 4101 end_unchanged = Z - GPT; |
3943 | 4102 |
3944 inserted = inserted_byte = 0; | 4103 inserted = inserted_byte = 0; |
4104 src = GAP_END_ADDR, dst = GPT_ADDR; | |
4105 | |
4106 GAP_SIZE += len_byte; | |
4107 ZV -= len; | |
4108 Z -= len; | |
4109 ZV_BYTE -= len_byte; | |
4110 Z_BYTE -= len_byte; | |
4111 | |
3945 for (;;) | 4112 for (;;) |
3946 { | 4113 { |
3947 int result, diff_char, diff_byte; | 4114 int result; |
3948 | 4115 |
3949 /* The buffer memory is changed from: | 4116 /* The buffer memory is changed from: |
3950 +--------+converted-text+------------+-----original-text-----+---+ | 4117 +--------+converted-text+---------+-------original-text------+---+ |
3951 |<-from->|<--inserted-->|<-GAP_SIZE->|<---------len--------->|---| */ | 4118 |<-from->|<--inserted-->|---------|<-----------len---------->|---| |
3952 | 4119 |<------------------- GAP_SIZE -------------------->| */ |
3953 if (encodep) | 4120 if (encodep) |
3954 result = encode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); | 4121 result = encode_coding (coding, src, dst, len_byte, 0); |
3955 else | 4122 else |
3956 result = decode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); | 4123 result = decode_coding (coding, src, dst, len_byte, 0); |
3957 /* to: | 4124 /* to: |
3958 +--------+-------converted-text--------+--+---original-text--+---+ | 4125 +--------+-------converted-text--------+--+---original-text--+---+ |
3959 |<-from->|<----(inserted+produced)---->|--|<-(len-consumed)->|---| */ | 4126 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| |
3960 | 4127 |<------------------- GAP_SIZE -------------------->| */ |
3961 diff_char = coding->produced_char - coding->consumed_char; | 4128 if (coding->fake_multibyte) |
3962 diff_byte = coding->produced - coding->consumed; | 4129 fake_multibyte = 1; |
3963 | 4130 |
3964 GAP_SIZE -= diff_byte; | 4131 if (!encodep && !multibyte) |
3965 ZV += diff_char; ZV_BYTE += diff_byte; | 4132 coding->produced_char = coding->produced; |
3966 Z += diff_char; Z_BYTE += diff_byte; | |
3967 GPT += coding->produced_char; GPT_BYTE += coding->produced; | |
3968 | |
3969 inserted += coding->produced_char; | 4133 inserted += coding->produced_char; |
3970 inserted_byte += coding->produced; | 4134 inserted_byte += coding->produced; |
3971 len -= coding->consumed_char; | |
3972 len_byte -= coding->consumed; | 4135 len_byte -= coding->consumed; |
4136 src += coding->consumed; | |
4137 dst += inserted_byte; | |
3973 | 4138 |
3974 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) | 4139 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) |
3975 { | 4140 { |
3976 unsigned char *p = GPT_ADDR - inserted_byte, *pend = GPT_ADDR; | 4141 unsigned char *pend = dst, *p = pend - inserted_byte; |
3977 | 4142 |
3978 /* Encode LFs back to the original eol format (CR or CRLF). */ | 4143 /* Encode LFs back to the original eol format (CR or CRLF). */ |
3979 if (coding->eol_type == CODING_EOL_CR) | 4144 if (coding->eol_type == CODING_EOL_CR) |
3980 { | 4145 { |
3981 while (p < pend) if (*p++ == '\n') p[-1] = '\r'; | 4146 while (p < pend) if (*p++ == '\n') p[-1] = '\r'; |
3982 } | 4147 } |
3983 else | 4148 else |
3984 { | 4149 { |
3985 unsigned char *p2 = p; | |
3986 int count = 0; | 4150 int count = 0; |
3987 | 4151 |
3988 while (p2 < pend) if (*p2++ == '\n') count++; | 4152 while (p < pend) if (*p++ == '\n') count++; |
3989 if (GAP_SIZE < count) | 4153 if (src - dst < count) |
3990 make_gap (count - GAP_SIZE); | |
3991 p2 = GPT_ADDR + count; | |
3992 while (p < pend) | |
3993 { | 4154 { |
3994 *--p2 = *--pend; | 4155 /* We don't have sufficient room for putting LFs |
3995 if (*pend == '\n') *--p2 = '\r'; | 4156 back to CRLF. We must record converted and |
4157 not-yet-converted text back to the buffer | |
4158 content, enlarge the gap, then record them out of | |
4159 the buffer contents again. */ | |
4160 int add = len_byte + inserted_byte; | |
4161 | |
4162 GAP_SIZE -= add; | |
4163 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | |
4164 GPT += inserted_byte; GPT_BYTE += inserted_byte; | |
4165 make_gap (count - GAP_SIZE); | |
4166 GAP_SIZE += add; | |
4167 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | |
4168 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | |
4169 /* Don't forget to update SRC, DST, and PEND. */ | |
4170 src = GAP_END_ADDR - len_byte; | |
4171 dst = GPT_ADDR + inserted_byte; | |
4172 pend = dst; | |
3996 } | 4173 } |
3997 GPT += count; GAP_SIZE -= count; ZV += count; Z += count; | |
3998 ZV_BYTE += count; Z_BYTE += count; | |
3999 coding->produced += count; | |
4000 coding->produced_char += count; | |
4001 inserted += count; | 4174 inserted += count; |
4002 inserted_byte += count; | 4175 inserted_byte += count; |
4176 coding->produced += count; | |
4177 p = dst = pend + count; | |
4178 while (count) | |
4179 { | |
4180 *--p = *--pend; | |
4181 if (*p == '\n') count--, *--p = '\r'; | |
4182 } | |
4003 } | 4183 } |
4004 | 4184 |
4005 /* Suppress eol-format conversion in the further conversion. */ | 4185 /* Suppress eol-format conversion in the further conversion. */ |
4006 coding->eol_type = CODING_EOL_LF; | 4186 coding->eol_type = CODING_EOL_LF; |
4007 | 4187 |
4008 /* Restore the original symbol. */ | 4188 /* Restore the original symbol. */ |
4009 coding->symbol = saved_coding_symbol; | 4189 coding->symbol = saved_coding_symbol; |
4190 | |
4191 continue; | |
4010 } | 4192 } |
4011 if (len_byte <= 0) | 4193 if (len_byte <= 0) |
4012 break; | 4194 break; |
4013 if (result == CODING_FINISH_INSUFFICIENT_SRC) | 4195 if (result == CODING_FINISH_INSUFFICIENT_SRC) |
4014 { | 4196 { |
4015 /* The source text ends in invalid codes. Let's just | 4197 /* The source text ends in invalid codes. Let's just |
4016 make them valid buffer contents, and finish conversion. */ | 4198 make them valid buffer contents, and finish conversion. */ |
4017 inserted += len; | 4199 inserted += len_byte; |
4018 inserted_byte += len_byte; | 4200 inserted_byte += len_byte; |
4201 while (len_byte--) | |
4202 *src++ = *dst++; | |
4203 fake_multibyte = 1; | |
4019 break; | 4204 break; |
4020 } | 4205 } |
4021 if (inserted == coding->produced_char) | 4206 if (first) |
4022 /* We have just done the first batch of conversion. Let's | 4207 { |
4023 reconsider the required gap size now. | 4208 /* We have just done the first batch of conversion which was |
4024 | 4209 stoped because of insufficient gap. Let's reconsider the |
4025 We have converted CONSUMED bytes into PRODUCED bytes. To | 4210 required gap size (i.e. SRT - DST) now. |
4026 convert the remaining LEN bytes, we may need REQUIRE bytes | 4211 |
4027 of gap, where: | 4212 We have converted ORIG bytes (== coding->consumed) into |
4028 REQUIRE + LEN = (LEN * PRODUCED / CONSUMED) | 4213 NEW bytes (coding->produced). To convert the remaining |
4029 REQUIRE = LEN * (PRODUCED - CONSUMED) / CONSUMED | 4214 LEN bytes, we may need REQUIRE bytes of gap, where: |
4030 = LEN * DIFF / CONSUMED | 4215 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) |
4031 Here, we are sure that DIFF is positive. */ | 4216 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG |
4032 require = len_byte * diff_byte / coding->consumed; | 4217 Here, we are sure that NEW >= ORIG. */ |
4033 if (GAP_SIZE < require) | 4218 require = (len_byte * (coding->produced - coding->consumed) |
4034 make_gap (require - GAP_SIZE); | 4219 / coding->consumed); |
4035 } | 4220 first = 0; |
4036 if (GAP_SIZE > 0) *GPT_ADDR = 0; /* Put an anchor. */ | 4221 } |
4222 if ((src - dst) < (require + 2000)) | |
4223 { | |
4224 /* See the comment above the previous call of make_gap. */ | |
4225 int add = len_byte + inserted_byte; | |
4226 | |
4227 GAP_SIZE -= add; | |
4228 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | |
4229 GPT += inserted_byte; GPT_BYTE += inserted_byte; | |
4230 make_gap (require + 2000); | |
4231 GAP_SIZE += add; | |
4232 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | |
4233 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | |
4234 /* Don't forget to update SRC, DST. */ | |
4235 src = GAP_END_ADDR - len_byte; | |
4236 dst = GPT_ADDR + inserted_byte; | |
4237 } | |
4238 } | |
4239 if (src - dst > 0) *dst = 0; /* Put an anchor. */ | |
4240 | |
4241 if (multibyte && (fake_multibyte || !encodep && (to - from) != (to_byte - from_byte))) | |
4242 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); | |
4243 | |
4244 /* Update various buffer positions for the new text. */ | |
4245 GAP_SIZE -= inserted_byte; | |
4246 ZV += inserted; Z+= inserted; | |
4247 ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte; | |
4248 GPT += inserted; GPT_BYTE += inserted_byte; | |
4037 | 4249 |
4038 if (adjust) | 4250 if (adjust) |
4039 { | 4251 { |
4040 adjust_after_replace (from, from_byte, to, to_byte, | 4252 adjust_after_replace (from, from_byte, to, to_byte, |
4041 inserted, inserted_byte); | 4253 inserted, inserted_byte); |
4053 inserted = XFASTINT (val); | 4265 inserted = XFASTINT (val); |
4054 } | 4266 } |
4055 if (pos >= from + orig_inserted) | 4267 if (pos >= from + orig_inserted) |
4056 temp_set_point (current_buffer, pos + (inserted - orig_inserted)); | 4268 temp_set_point (current_buffer, pos + (inserted - orig_inserted)); |
4057 } | 4269 } |
4058 } | 4270 signal_after_change (from, to - from, inserted); |
4059 | 4271 } |
4060 return ((from_byte - from_byte_orig) + inserted + (to_byte_orig - to_byte)); | 4272 |
4273 { | |
4274 int skip = (to_byte_orig - to_byte) + (from_byte - from_byte_orig); | |
4275 | |
4276 coding->consumed = to_byte_orig - from_byte_orig; | |
4277 coding->consumed_char = skip + (to - from); | |
4278 coding->produced = skip + inserted_byte; | |
4279 coding->produced_char = skip + inserted; | |
4280 } | |
4281 return 0; | |
4061 } | 4282 } |
4062 | 4283 |
4063 Lisp_Object | 4284 Lisp_Object |
4064 code_convert_string (str, coding, encodep, nocopy) | 4285 code_convert_string (str, coding, encodep, nocopy) |
4065 Lisp_Object str; | 4286 Lisp_Object str; |
4093 unibyte<->multibyte conversion. */ | 4314 unibyte<->multibyte conversion. */ |
4094 current_buffer->enable_multibyte_characters = Qnil; | 4315 current_buffer->enable_multibyte_characters = Qnil; |
4095 insert_from_string (str, 0, 0, to_byte, to_byte, 0); | 4316 insert_from_string (str, 0, 0, to_byte, to_byte, 0); |
4096 current_buffer->enable_multibyte_characters = Qt; | 4317 current_buffer->enable_multibyte_characters = Qt; |
4097 } | 4318 } |
4098 code_convert_region (BEGV, ZV, coding, encodep, 1); | 4319 code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1); |
4099 if (encodep) | 4320 if (encodep) |
4100 /* We must return the buffer contents as unibyte string. */ | 4321 /* We must return the buffer contents as unibyte string. */ |
4101 current_buffer->enable_multibyte_characters = Qnil; | 4322 current_buffer->enable_multibyte_characters = Qnil; |
4102 str = make_buffer_string (BEGV, ZV, 0); | 4323 str = make_buffer_string (BEGV, ZV, 0); |
4103 set_buffer_internal (prev); | 4324 set_buffer_internal (prev); |
4375 | 4596 |
4376 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 4597 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
4377 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 4598 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
4378 | 4599 |
4379 coding.mode |= CODING_MODE_LAST_BLOCK; | 4600 coding.mode |= CODING_MODE_LAST_BLOCK; |
4380 len = code_convert_region (from, to, &coding, encodep, 1); | 4601 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), |
4381 return make_number (len); | 4602 &coding, encodep, 1); |
4603 return make_number (coding.produced_char); | |
4382 } | 4604 } |
4383 | 4605 |
4384 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, | 4606 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, |
4385 3, 3, "r\nzCoding system: ", | 4607 3, 3, "r\nzCoding system: ", |
4386 "Decode the current region by specified coding system.\n\ | 4608 "Decode the current region by specified coding system.\n\ |