Mercurial > emacs
changeset 20931:068eb408c911
(decode_coding_iso2022): Update coding->fake_multibyte.
(ENCODE_SINGLE_SHIFT_2, ENCODE_SINGLE_SHIFT_3,
encode_coding_iso2022, decode_coding_sjis_big5,
encode_coding_sjis_big5, decode_eol, encode_eol, decode_coding,
encode_coding): Likewise.
(shrink_decoding_region, shrink_encoding_region): Do not skip
non-ASCII code in any cases. Bug fix for getting starting address
from BEG.
(code_convert_region): Sync character positions correctly by
paying attention to coding->fake_multibyte.
(code_convert_string): Set number of character and bytes just
processed in members of CODING.
(code_convert_string): Adjusted for the change of
code_convert_region.
(code_convert_region1): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 20 Feb 1998 01:40:47 +0000 |
parents | 1331679fe704 |
children | 3c2c8431c51d |
files | src/coding.c |
diffstat | 1 files changed, 385 insertions(+), 163 deletions(-) [+] |
line wrap: on
line diff
--- a/src/coding.c Fri Feb 20 01:40:47 1998 +0000 +++ b/src/coding.c Fri Feb 20 01:40:47 1998 +0000 @@ -1007,6 +1007,7 @@ unification_table = Vstandard_character_unification_table_for_decode; coding->produced_char = 0; + coding->fake_multibyte = 0; while (src < src_end && (dst_bytes ? (dst < adjusted_dst_end) : (dst < src - 6))) @@ -1046,21 +1047,12 @@ case ISO_0xA0_or_0xFF: if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) - { - /* Invalid code. */ - *dst++ = c1; - coding->produced_char++; - break; - } + goto label_invalid_code; /* This is a graphic character, we fall down ... */ case ISO_graphic_plane_1: if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) - { - /* Invalid code. */ - *dst++ = c1; - coding->produced_char++; - } + goto label_invalid_code; else DECODE_ISO_CHARACTER (charset1, c1); break; @@ -1310,9 +1302,9 @@ break; label_invalid_code: - coding->produced_char += src - src_base; while (src_base < src) *dst++ = *src_base++; + coding->fake_multibyte = 1; } continue; @@ -1323,19 +1315,26 @@ break; } - if (result == CODING_FINISH_NORMAL - && src < src_end) - result = CODING_FINISH_INSUFFICIENT_DST; - - /* If this is the last block of the text to be decoded, we had - better just flush out all remaining codes in the text although - they are not valid characters. */ - if (coding->mode & CODING_MODE_LAST_BLOCK) + if (src < src_end) { - bcopy (src, dst, src_end - src); - dst += (src_end - src); - src = src_end; + if (result == CODING_FINISH_NORMAL) + result = CODING_FINISH_INSUFFICIENT_DST; + else if (result != CODING_FINISH_INCONSISTENT_EOL + && coding->mode & CODING_MODE_LAST_BLOCK) + { + /* This is the last block of the text to be decoded. We had + better just flush out all remaining codes in the text + although they are not valid characters. */ + src_bytes = src_end - src; + if (dst_bytes && (dst_end - dst < src_bytes)) + src_bytes = dst_end - dst; + bcopy (src, dst, src_bytes); + dst += src_bytes; + src += src_bytes; + coding->fake_multibyte = 1; + } } + coding->consumed = coding->consumed_char = src - source; coding->produced = dst - destination; return result; @@ -1413,16 +1412,22 @@ if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ else \ - *dst++ = ISO_CODE_SS2; \ + { \ + *dst++ = ISO_CODE_SS2; \ + coding->fake_multibyte = 1; \ + } \ CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ } while (0) -#define ENCODE_SINGLE_SHIFT_3 \ - do { \ +#define ENCODE_SINGLE_SHIFT_3 \ + do { \ if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ - *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ - else \ - *dst++ = ISO_CODE_SS3; \ + *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ + else \ + { \ + *dst++ = ISO_CODE_SS3; \ + coding->fake_multibyte = 1; \ + } \ CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ } while (0) @@ -1746,6 +1751,7 @@ unification_table = Vstandard_character_unification_table_for_encode; coding->consumed_char = 0; + coding->fake_multibyte = 0; while (src < src_end && (dst_bytes ? (dst < adjusted_dst_end) : (dst < src - 19))) @@ -1933,15 +1939,17 @@ break; } - if (result == CODING_FINISH_NORMAL - && src < src_end) - result = CODING_FINISH_INSUFFICIENT_DST; - - /* If this is the last block of the text to be encoded, we must - reset graphic planes and registers to the initial state, and - flush out the carryover if any. */ - if (coding->mode & CODING_MODE_LAST_BLOCK) - ENCODE_RESET_PLANE_AND_REGISTER; + if (src < src_end) + { + if (result == CODING_FINISH_NORMAL) + result = CODING_FINISH_INSUFFICIENT_DST; + else + /* If this is the last block of the text to be encoded, we + must reset graphic planes and registers to the initial + state, and flush out the carryover if any. */ + if (coding->mode & CODING_MODE_LAST_BLOCK) + ENCODE_RESET_PLANE_AND_REGISTER; + } coding->consumed = src - source; coding->produced = coding->produced_char = dst - destination; @@ -2054,7 +2062,10 @@ if (sjis_p && charset_alt == charset_katakana_jisx0201) \ *dst++ = c1; \ else \ - *dst++ = charset_alt, *dst++ = c1; \ + { \ + *dst++ = charset_alt, *dst++ = c1; \ + coding->fake_multibyte = 1; \ + } \ } \ else \ { \ @@ -2062,21 +2073,25 @@ if (sjis_p && charset_alt == charset_jisx0208) \ { \ unsigned char s1, s2; \ - \ + \ ENCODE_SJIS (c1, c2, s1, s2); \ *dst++ = s1, *dst++ = s2; \ + coding->fake_multibyte = 1; \ } \ else if (!sjis_p \ && (charset_alt == charset_big5_1 \ || charset_alt == charset_big5_2)) \ { \ unsigned char b1, b2; \ - \ + \ ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ *dst++ = b1, *dst++ = b2; \ } \ else \ - *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ + { \ + *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ + coding->fake_multibyte = 1; \ + } \ } \ coding->consumed_char++; \ } while (0); @@ -2155,6 +2170,7 @@ unification_table = Vstandard_character_unification_table_for_decode; coding->produced_char = 0; + coding->fake_multibyte = 0; while (src < src_end && (dst_bytes ? (dst < adjusted_dst_end) : (dst < src - 3))) @@ -2203,30 +2219,24 @@ } else if (c1 < 0x80) DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); - else if (c1 < 0xA0 || c1 >= 0xE0) + else if (c1 < 0xA0) { - /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */ + /* SJIS -> JISX0208 */ if (sjis_p) { ONE_MORE_BYTE (c2); - DECODE_SJIS (c1, c2, c3, c4); - DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); + if (c2 >= 0x40) + { + DECODE_SJIS (c1, c2, c3, c4); + DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); + } + else + goto label_invalid_code_2; } - else if (c1 >= 0xE0 && c1 < 0xFF) - { - int charset; - - ONE_MORE_BYTE (c2); - DECODE_BIG5 (c1, c2, charset, c3, c4); - DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); - } - else /* Invalid code */ - { - *dst++ = c1; - coding->produced_char++; - } + else + goto label_invalid_code_1; } - else + else if (c1 < 0xE0) { /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ if (sjis_p) @@ -2237,10 +2247,55 @@ int charset; ONE_MORE_BYTE (c2); - DECODE_BIG5 (c1, c2, charset, c3, c4); - DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); + if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) + { + DECODE_BIG5 (c1, c2, charset, c3, c4); + DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); + } + else + goto label_invalid_code_2; } } + else /* C1 >= 0xE0 */ + { + /* SJIS -> JISX0208, BIG5 -> Big5 */ + if (sjis_p) + { + ONE_MORE_BYTE (c2); + if (c2 >= 0x40) + { + DECODE_SJIS (c1, c2, c3, c4); + DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); + } + else + goto label_invalid_code_2; + } + else + { + int charset; + + ONE_MORE_BYTE (c2); + if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) + { + DECODE_BIG5 (c1, c2, charset, c3, c4); + DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); + } + else + goto label_invalid_code_2; + } + } + continue; + + label_invalid_code_1: + *dst++ = c1; + coding->produced_char++; + coding->fake_multibyte = 1; + continue; + + label_invalid_code_2: + *dst++ = c1; *dst++= c2; + coding->produced_char += 2; + coding->fake_multibyte = 1; continue; label_end_of_loop: @@ -2250,9 +2305,22 @@ break; } - if (result == CODING_FINISH_NORMAL - && src < src_end) - result = CODING_FINISH_INSUFFICIENT_DST; + if (src < src_end) + { + if (result == CODING_FINISH_NORMAL) + result = CODING_FINISH_INSUFFICIENT_DST; + else if (result != CODING_FINISH_INCONSISTENT_EOL + && coding->mode & CODING_MODE_LAST_BLOCK) + { + src_bytes = src_end - src; + if (dst_bytes && (dst_end - dst < src_bytes)) + src_bytes = dst_end - dst; + bcopy (dst, src, src_bytes); + src += src_bytes; + dst += src_bytes; + coding->fake_multibyte = 1; + } + } coding->consumed = coding->consumed_char = src - source; coding->produced = dst - destination; @@ -2291,6 +2359,7 @@ unification_table = Vstandard_character_unification_table_for_encode; coding->consumed_char = 0; + coding->fake_multibyte = 0; while (src < src_end && (dst_bytes ? (dst < adjusted_dst_end) : (dst < src - 1))) @@ -2402,8 +2471,11 @@ unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; + unsigned char c; int result = CODING_FINISH_NORMAL; + coding->fake_multibyte = 0; + if (src_bytes <= 0) return result; @@ -2421,7 +2493,8 @@ : (dst < src - 1))) { unsigned char *src_base = src; - unsigned char c = *src++; + + c = *src++; if (c == '\r') { ONE_MORE_BYTE (c); @@ -2433,6 +2506,8 @@ goto label_end_of_loop_2; } *dst++ = '\r'; + if (BASE_LEADING_CODE_P (c)) + coding->fake_multibyte = 1; } *dst++ = c; } @@ -2443,7 +2518,11 @@ goto label_end_of_loop_2; } else - *dst++ = c; + { + *dst++ = c; + if (BASE_LEADING_CODE_P (c)) + coding->fake_multibyte = 1; + } continue; label_end_of_loop: @@ -2461,7 +2540,13 @@ case CODING_EOL_CR: if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) { - while (src < src_end) if (*src++ == '\n') break; + while (src < src_end) + { + if ((c = *src++) == '\n') + break; + if (BASE_LEADING_CODE_P (c)) + coding->fake_multibyte = 1; + } if (*--src == '\n') { src_bytes = src - source; @@ -2493,6 +2578,7 @@ safe_bcopy (source, destination, src_bytes); src += src_bytes; dst += dst_bytes; + coding->fake_multibyte = 1; break; } @@ -2515,6 +2601,8 @@ unsigned char *dst = destination; int result = CODING_FINISH_NORMAL; + coding->fake_multibyte = 0; + if (coding->eol_type == CODING_EOL_CRLF) { unsigned char c; @@ -2534,13 +2622,19 @@ || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) *dst++ = '\r', *dst++ = '\n'; else - *dst++ = c; + { + *dst++ = c; + if (BASE_LEADING_CODE_P (c)) + coding->fake_multibyte = 1; + } } if (src < src_end) result = CODING_FINISH_INSUFFICIENT_DST; } else { + unsigned char c; + if (dst_bytes && src_bytes > dst_bytes) { src_bytes = dst_bytes; @@ -2549,19 +2643,31 @@ if (dst_bytes) bcopy (source, destination, src_bytes); else - safe_bcopy (source, destination, src_bytes); + { + safe_bcopy (source, destination, src_bytes); + dst_bytes = src_bytes; + } if (coding->eol_type == CODING_EOL_CRLF) { while (src_bytes--) - if (*dst++ == '\n') dst[-1] = '\r'; + { + if ((c = *dst++) == '\n') + dst[-1] = '\r'; + else if (BASE_LEADING_CODE_P (c)) + coding->fake_multibyte = 1; + } } - else if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) + else { - while (src_bytes--) - if (*dst++ == '\r') dst[-1] = '\n'; + if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) + { + while (src_bytes--) + if (*dst++ == '\r') dst[-1] = '\n'; + } + coding->fake_multibyte = 1; } - src += src_bytes; - dst += src_bytes; + src = source + dst_bytes; + dst = destination + dst_bytes; } coding->consumed = coding->consumed_char = src - source; @@ -3458,6 +3564,7 @@ { coding->produced = coding->produced_char = 0; coding->consumed = coding->consumed_char = 0; + coding->fake_multibyte = 0; return CODING_FINISH_NORMAL; } @@ -3514,6 +3621,7 @@ bcopy (source, destination, coding->produced); else safe_bcopy (source, destination, coding->produced); + coding->fake_multibyte = 1; coding->consumed = coding->consumed_char = coding->produced_char = coding->produced; break; @@ -3536,6 +3644,7 @@ { coding->produced = coding->produced_char = 0; coding->consumed = coding->consumed_char = 0; + coding->fake_multibyte = 0; return CODING_FINISH_NORMAL; } @@ -3592,6 +3701,7 @@ while (p < pend) if (*p++ == '\015') p[-1] = '\n'; } + coding->fake_multibyte = 1; coding->consumed = coding->consumed_char = coding->produced_char = coding->produced; break; @@ -3600,10 +3710,11 @@ return result; } -/* Scan text in the region between *BEG and *END, skip characters - which we don't have to decode by coding system CODING at the head - and tail, then set *BEG and *END to the region of the text we - actually have to convert. +/* Scan text in the region between *BEG and *END (byte positions), + skip characters which we don't have to decode by coding system + CODING at the head and tail, then set *BEG and *END to the region + of the text we actually have to convert. The caller should move + the gap out of the region in advance. If STR is not NULL, *BEG and *END are indices into STR. */ @@ -3613,7 +3724,7 @@ struct coding_system *coding; unsigned char *str; { - unsigned char *begp_orig, *begp, *endp_orig, *endp; + unsigned char *begp_orig, *begp, *endp_orig, *endp, c; int eol_conversion; if (coding->type == coding_type_ccl @@ -3625,8 +3736,8 @@ } else if (coding->type == coding_type_no_conversion) { - /* We need no conversion. */ - *beg = *end; + /* We need no conversion, but don't have to skip any data here. + Decoding routine handles them effectively anyway. */ return; } @@ -3642,8 +3753,7 @@ } else { - move_gap (*beg); - begp_orig = begp = GAP_END_ADDR; + begp_orig = begp = BYTE_POS_ADDR (*beg); endp_orig = endp = begp + *end - *beg; } @@ -3656,8 +3766,9 @@ if (eol_conversion) { if (coding->heading_ascii < 0) - while (begp < endp && *begp != '\r') begp++; - while (begp < endp && *(endp - 1) != '\r') endp--; + while (begp < endp && *begp != '\r' && *begp < 0x80) begp++; + while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80) + endp--; } else begp = endp; @@ -3686,8 +3797,6 @@ default: /* i.e. case coding_type_iso2022: */ if (coding->heading_ascii < 0) { - unsigned char c; - /* We can skip all ASCII characters at the head except for a few control codes. */ while (begp < endp && (c = *begp) < 0x80 @@ -3702,7 +3811,7 @@ case CODING_CATEGORY_IDX_ISO_8_2: /* We can skip all ASCII characters at the tail. */ if (eol_conversion) - while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--; + while (begp < endp && (c = endp[-1]) < 0x80 && c != '\n') endp--; else while (begp < endp && endp[-1] < 0x80) endp--; break; @@ -3712,10 +3821,12 @@ /* We can skip all charactes at the tail except for ESC and the following 2-byte at the tail. */ if (eol_conversion) - while (begp < endp && endp[-1] != ISO_CODE_ESC && endp[-1] != '\n') + while (begp < endp + && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\n') endp--; else - while (begp < endp && endp[-1] != ISO_CODE_ESC) + while (begp < endp + && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC) endp--; if (begp < endp && endp[-1] == ISO_CODE_ESC) { @@ -3762,8 +3873,7 @@ } else { - move_gap (*beg); - begp_orig = begp = GAP_END_ADDR; + begp_orig = begp = BYTE_POS_ADDR (*beg); endp_orig = endp = begp + *end - *beg; } @@ -3821,8 +3931,13 @@ } /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the - text from FROM to TO by coding system CODING, and return number of - characters in the resulting text. + text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by + coding system CODING, and return the status code of code conversion + (currently, this value has no meaning). + + How many characters (and bytes) are converted to how many + characters (and bytes) are recorded in members of the structure + CODING. If ADJUST is nonzero, we do various things as if the original text is deleted and a new text is inserted. See the comments in @@ -3832,22 +3947,34 @@ pre-write-conversion functions (if any) should be processed. */ int -code_convert_region (from, to, coding, encodep, adjust) - int from, to, encodep, adjust; +code_convert_region (from, from_byte, to, to_byte, coding, encodep, adjust) + int from, from_byte, to, to_byte, encodep, adjust; struct coding_system *coding; { - int len = to - from, require, inserted, inserted_byte; - int from_byte, to_byte, len_byte; + int len = to - from, len_byte = to_byte - from_byte; + int require, inserted, inserted_byte; int from_byte_orig, to_byte_orig; Lisp_Object saved_coding_symbol = Qnil; + int multibyte = !NILP (current_buffer->enable_multibyte_characters); + int first = 1; + int fake_multibyte = 0; + unsigned char *src, *dst; if (adjust) { + int saved_from = from; + prepare_to_modify_buffer (from, to, &from); - to = from + len; + if (saved_from != from) + { + to = from + len; + if (multibyte) + from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to); + else + from_byte = from, to_byte = to; + len_byte = to_byte - from_byte; + } } - from_byte = CHAR_TO_BYTE (from); to_byte = CHAR_TO_BYTE (to); - len_byte = to_byte - from_byte; if (! encodep && CODING_REQUIRE_DETECTION (coding)) { @@ -3860,7 +3987,7 @@ move_gap_both (from, from_byte); if (coding->type == coding_type_undecided) { - detect_coding (coding, BYTE_POS_ADDR (from), len); + detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte); if (coding->type == coding_type_undecided) coding->type = coding_type_emacs_mule; } @@ -3876,10 +4003,35 @@ } } + coding->consumed_char = len, coding->consumed = len_byte; + if (encodep ? ! CODING_REQUIRE_ENCODING (coding) : ! CODING_REQUIRE_DECODING (coding)) - return len; + { + coding->produced = len_byte; + if (multibyte) + { + if (GPT < from || GPT > to) + move_gap_both (from, from_byte); + coding->produced_char + = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte); + if (coding->produced_char != len) + { + int diff = coding->produced_char - len; + + if (adjust) + adjust_before_replace (from, from_byte, to, to_byte); + ZV += diff; Z += diff; GPT += diff; + if (adjust) + adjust_after_replace (from, from_byte, to, to_byte, + diff, 0); + } + } + else + coding->produced_char = len_byte; + return 0; + } /* Now we convert the text. */ @@ -3900,33 +4052,40 @@ len = ZV - BEGV; new = current_buffer; set_buffer_internal_1 (prev); - del_range (from, to); + del_range_2 (from, to, from_byte, to_byte); insert_from_buffer (new, BEG, len, 0); to = from + len; - to_byte = CHAR_TO_BYTE (to); + to_byte = multibyte ? CHAR_TO_BYTE (to) : to; len_byte = to_byte - from_byte; } } /* Try to skip the heading and tailing ASCIIs. */ from_byte_orig = from_byte; to_byte_orig = to_byte; + if (from < GPT && GPT < to) + move_gap (from); if (encodep) shrink_encoding_region (&from_byte, &to_byte, coding, NULL); else shrink_decoding_region (&from_byte, &to_byte, coding, NULL); if (from_byte == to_byte) - return len; + { + coding->produced = len_byte; + coding->produced_char = multibyte ? len : len_byte; + return 0; + } + /* Here, the excluded region by shrinking contains only ASCIIs. */ from += (from_byte - from_byte_orig); to += (to_byte - to_byte_orig); len = to - from; len_byte = to_byte - from_byte; - /* For converion, we must put the gap before the text to be decoded - in addition to make the gap larger for efficient decoding. The - required gap size starts from 2000 which is the magic number used - in make_gap. But, after one batch of conversion, it will be - incremented if we find that it is not enough . */ + /* For converion, we must put the gap before the text in addition to + making the gap larger for efficient decoding. The required gap + size starts from 2000 which is the magic number used in make_gap. + But, after one batch of conversion, it will be incremented if we + find that it is not enough . */ require = 2000; if (GAP_SIZE < require) @@ -3942,38 +4101,44 @@ end_unchanged = Z - GPT; inserted = inserted_byte = 0; + src = GAP_END_ADDR, dst = GPT_ADDR; + + GAP_SIZE += len_byte; + ZV -= len; + Z -= len; + ZV_BYTE -= len_byte; + Z_BYTE -= len_byte; + for (;;) { - int result, diff_char, diff_byte; + int result; /* The buffer memory is changed from: - +--------+converted-text+------------+-----original-text-----+---+ - |<-from->|<--inserted-->|<-GAP_SIZE->|<---------len--------->|---| */ - + +--------+converted-text+---------+-------original-text------+---+ + |<-from->|<--inserted-->|---------|<-----------len---------->|---| + |<------------------- GAP_SIZE -------------------->| */ if (encodep) - result = encode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); + result = encode_coding (coding, src, dst, len_byte, 0); else - result = decode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); + result = decode_coding (coding, src, dst, len_byte, 0); /* to: +--------+-------converted-text--------+--+---original-text--+---+ - |<-from->|<----(inserted+produced)---->|--|<-(len-consumed)->|---| */ - - diff_char = coding->produced_char - coding->consumed_char; - diff_byte = coding->produced - coding->consumed; - - GAP_SIZE -= diff_byte; - ZV += diff_char; ZV_BYTE += diff_byte; - Z += diff_char; Z_BYTE += diff_byte; - GPT += coding->produced_char; GPT_BYTE += coding->produced; - + |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| + |<------------------- GAP_SIZE -------------------->| */ + if (coding->fake_multibyte) + fake_multibyte = 1; + + if (!encodep && !multibyte) + coding->produced_char = coding->produced; inserted += coding->produced_char; inserted_byte += coding->produced; - len -= coding->consumed_char; len_byte -= coding->consumed; + src += coding->consumed; + dst += inserted_byte; if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) { - unsigned char *p = GPT_ADDR - inserted_byte, *pend = GPT_ADDR; + unsigned char *pend = dst, *p = pend - inserted_byte; /* Encode LFs back to the original eol format (CR or CRLF). */ if (coding->eol_type == CODING_EOL_CR) @@ -3982,24 +4147,39 @@ } else { - unsigned char *p2 = p; int count = 0; - while (p2 < pend) if (*p2++ == '\n') count++; - if (GAP_SIZE < count) - make_gap (count - GAP_SIZE); - p2 = GPT_ADDR + count; - while (p < pend) + while (p < pend) if (*p++ == '\n') count++; + if (src - dst < count) { - *--p2 = *--pend; - if (*pend == '\n') *--p2 = '\r'; + /* We don't have sufficient room for putting LFs + back to CRLF. We must record converted and + not-yet-converted text back to the buffer + content, enlarge the gap, then record them out of + the buffer contents again. */ + int add = len_byte + inserted_byte; + + GAP_SIZE -= add; + ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; + GPT += inserted_byte; GPT_BYTE += inserted_byte; + make_gap (count - GAP_SIZE); + GAP_SIZE += add; + ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; + GPT -= inserted_byte; GPT_BYTE -= inserted_byte; + /* Don't forget to update SRC, DST, and PEND. */ + src = GAP_END_ADDR - len_byte; + dst = GPT_ADDR + inserted_byte; + pend = dst; } - GPT += count; GAP_SIZE -= count; ZV += count; Z += count; - ZV_BYTE += count; Z_BYTE += count; - coding->produced += count; - coding->produced_char += count; inserted += count; inserted_byte += count; + coding->produced += count; + p = dst = pend + count; + while (count) + { + *--p = *--pend; + if (*p == '\n') count--, *--p = '\r'; + } } /* Suppress eol-format conversion in the further conversion. */ @@ -4007,6 +4187,8 @@ /* Restore the original symbol. */ coding->symbol = saved_coding_symbol; + + continue; } if (len_byte <= 0) break; @@ -4014,26 +4196,56 @@ { /* The source text ends in invalid codes. Let's just make them valid buffer contents, and finish conversion. */ - inserted += len; + inserted += len_byte; inserted_byte += len_byte; + while (len_byte--) + *src++ = *dst++; + fake_multibyte = 1; break; } - if (inserted == coding->produced_char) - /* We have just done the first batch of conversion. Let's - reconsider the required gap size now. - - We have converted CONSUMED bytes into PRODUCED bytes. To - convert the remaining LEN bytes, we may need REQUIRE bytes - of gap, where: - REQUIRE + LEN = (LEN * PRODUCED / CONSUMED) - REQUIRE = LEN * (PRODUCED - CONSUMED) / CONSUMED - = LEN * DIFF / CONSUMED - Here, we are sure that DIFF is positive. */ - require = len_byte * diff_byte / coding->consumed; - if (GAP_SIZE < require) - make_gap (require - GAP_SIZE); + if (first) + { + /* We have just done the first batch of conversion which was + stoped because of insufficient gap. Let's reconsider the + required gap size (i.e. SRT - DST) now. + + We have converted ORIG bytes (== coding->consumed) into + NEW bytes (coding->produced). To convert the remaining + LEN bytes, we may need REQUIRE bytes of gap, where: + REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) + REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG + Here, we are sure that NEW >= ORIG. */ + require = (len_byte * (coding->produced - coding->consumed) + / coding->consumed); + first = 0; + } + if ((src - dst) < (require + 2000)) + { + /* See the comment above the previous call of make_gap. */ + int add = len_byte + inserted_byte; + + GAP_SIZE -= add; + ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; + GPT += inserted_byte; GPT_BYTE += inserted_byte; + make_gap (require + 2000); + GAP_SIZE += add; + ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; + GPT -= inserted_byte; GPT_BYTE -= inserted_byte; + /* Don't forget to update SRC, DST. */ + src = GAP_END_ADDR - len_byte; + dst = GPT_ADDR + inserted_byte; + } } - if (GAP_SIZE > 0) *GPT_ADDR = 0; /* Put an anchor. */ + if (src - dst > 0) *dst = 0; /* Put an anchor. */ + + if (multibyte && (fake_multibyte || !encodep && (to - from) != (to_byte - from_byte))) + inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); + + /* Update various buffer positions for the new text. */ + GAP_SIZE -= inserted_byte; + ZV += inserted; Z+= inserted; + ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte; + GPT += inserted; GPT_BYTE += inserted_byte; if (adjust) { @@ -4055,9 +4267,18 @@ if (pos >= from + orig_inserted) temp_set_point (current_buffer, pos + (inserted - orig_inserted)); } + signal_after_change (from, to - from, inserted); } - return ((from_byte - from_byte_orig) + inserted + (to_byte_orig - to_byte)); + { + int skip = (to_byte_orig - to_byte) + (from_byte - from_byte_orig); + + coding->consumed = to_byte_orig - from_byte_orig; + coding->consumed_char = skip + (to - from); + coding->produced = skip + inserted_byte; + coding->produced_char = skip + inserted; + } + return 0; } Lisp_Object @@ -4095,7 +4316,7 @@ insert_from_string (str, 0, 0, to_byte, to_byte, 0); current_buffer->enable_multibyte_characters = Qt; } - code_convert_region (BEGV, ZV, coding, encodep, 1); + code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1); if (encodep) /* We must return the buffer contents as unibyte string. */ current_buffer->enable_multibyte_characters = Qnil; @@ -4377,8 +4598,9 @@ error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); coding.mode |= CODING_MODE_LAST_BLOCK; - len = code_convert_region (from, to, &coding, encodep, 1); - return make_number (len); + code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), + &coding, encodep, 1); + return make_number (coding.produced_char); } DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,