Mercurial > emacs
comparison src/coding.c @ 21321:27b08da98abe
(code_convert_region): Handle skipped ASCII characters
at the head and tail of conversion region correctly when adjusting
position keepers. Call adjust_after_insert to record undo info
and adjust markers when we don't have to change byte sequence of
a buffer.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 30 Mar 1998 06:00:54 +0000 |
parents | 6d8fff47f6a0 |
children | 4c89837392b3 |
comparison
equal
deleted
inserted
replaced
21320:278c256defc6 | 21321:27b08da98abe |
---|---|
3948 int from, from_byte, to, to_byte, encodep, replace; | 3948 int from, from_byte, to, to_byte, encodep, replace; |
3949 struct coding_system *coding; | 3949 struct coding_system *coding; |
3950 { | 3950 { |
3951 int len = to - from, len_byte = to_byte - from_byte; | 3951 int len = to - from, len_byte = to_byte - from_byte; |
3952 int require, inserted, inserted_byte; | 3952 int require, inserted, inserted_byte; |
3953 int from_byte_orig, to_byte_orig; | 3953 int head_skip, tail_skip, total_skip; |
3954 Lisp_Object saved_coding_symbol = Qnil; | 3954 Lisp_Object saved_coding_symbol = Qnil; |
3955 int multibyte = !NILP (current_buffer->enable_multibyte_characters); | 3955 int multibyte = !NILP (current_buffer->enable_multibyte_characters); |
3956 int first = 1; | 3956 int first = 1; |
3957 int fake_multibyte = 0; | 3957 int fake_multibyte = 0; |
3958 unsigned char *src, *dst; | 3958 unsigned char *src, *dst; |
3959 Lisp_Object deletion = Qnil; | |
3959 | 3960 |
3960 if (replace) | 3961 if (replace) |
3961 { | 3962 { |
3962 int saved_from = from; | 3963 int saved_from = from; |
3963 | 3964 |
3973 } | 3974 } |
3974 } | 3975 } |
3975 | 3976 |
3976 if (! encodep && CODING_REQUIRE_DETECTION (coding)) | 3977 if (! encodep && CODING_REQUIRE_DETECTION (coding)) |
3977 { | 3978 { |
3978 /* We must detect encoding of text and eol. Even if detection | 3979 /* We must detect encoding of text and eol format. */ |
3979 routines can't decide the encoding, we should not let them | |
3980 undecided because the deeper decoding routine (decode_coding) | |
3981 tries to detect the encodings in vain in that case. */ | |
3982 | 3980 |
3983 if (from < GPT && to > GPT) | 3981 if (from < GPT && to > GPT) |
3984 move_gap_both (from, from_byte); | 3982 move_gap_both (from, from_byte); |
3985 if (coding->type == coding_type_undecided) | 3983 if (coding->type == coding_type_undecided) |
3986 { | 3984 { |
3987 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte); | 3985 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte); |
3988 if (coding->type == coding_type_undecided) | 3986 if (coding->type == coding_type_undecided) |
3987 /* It seems that the text contains only ASCII, but we | |
3988 should not left it undecided because the deeper | |
3989 decoding routine (decode_coding) tries to detect the | |
3990 encodings again in vain. */ | |
3989 coding->type = coding_type_emacs_mule; | 3991 coding->type = coding_type_emacs_mule; |
3990 } | 3992 } |
3991 if (coding->eol_type == CODING_EOL_UNDECIDED) | 3993 if (coding->eol_type == CODING_EOL_UNDECIDED) |
3992 { | 3994 { |
3993 saved_coding_symbol = coding->symbol; | 3995 saved_coding_symbol = coding->symbol; |
4005 if (encodep | 4007 if (encodep |
4006 ? ! CODING_REQUIRE_ENCODING (coding) | 4008 ? ! CODING_REQUIRE_ENCODING (coding) |
4007 : ! CODING_REQUIRE_DECODING (coding)) | 4009 : ! CODING_REQUIRE_DECODING (coding)) |
4008 { | 4010 { |
4009 coding->produced = len_byte; | 4011 coding->produced = len_byte; |
4010 if (multibyte) | 4012 if (multibyte |
4011 { | 4013 && ! replace |
4012 adjust_before_replace (from, from_byte, to, to_byte); | 4014 /* See the comment of the member heading_ascii in coding.h. */ |
4013 | 4015 && coding->heading_ascii < len_byte) |
4016 { | |
4014 /* We still may have to combine byte at the head and the | 4017 /* We still may have to combine byte at the head and the |
4015 tail of the text in the region. */ | 4018 tail of the text in the region. */ |
4016 if (GPT != to) | 4019 if (from < GPT && GPT < to) |
4017 move_gap_both (to, to_byte); | 4020 move_gap_both (to, to_byte); |
4018 coding->produced_char | 4021 len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte); |
4019 = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte); | 4022 adjust_after_insert (from, from_byte, to, to_byte, len); |
4020 GAP_SIZE += len_byte; | 4023 coding->produced_char = len; |
4021 GPT_BYTE -= len_byte; | |
4022 ZV_BYTE -= len_byte; | |
4023 Z_BYTE -= len_byte; | |
4024 GPT -= len; | |
4025 ZV -= len; | |
4026 Z -= len; | |
4027 adjust_after_replace (from, from_byte, to, to_byte, | |
4028 coding->produced_char, len_byte, replace); | |
4029 } | 4024 } |
4030 else | 4025 else |
4031 coding->produced_char = len_byte; | 4026 coding->produced_char = len_byte; |
4032 return 0; | 4027 return 0; |
4033 } | 4028 } |
4056 to_byte = multibyte ? CHAR_TO_BYTE (to) : to; | 4051 to_byte = multibyte ? CHAR_TO_BYTE (to) : to; |
4057 len_byte = to_byte - from_byte; | 4052 len_byte = to_byte - from_byte; |
4058 } | 4053 } |
4059 } | 4054 } |
4060 | 4055 |
4056 if (replace) | |
4057 deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1); | |
4058 | |
4061 /* Try to skip the heading and tailing ASCIIs. */ | 4059 /* Try to skip the heading and tailing ASCIIs. */ |
4062 from_byte_orig = from_byte; to_byte_orig = to_byte; | 4060 { |
4063 if (from < GPT && GPT < to) | 4061 int from_byte_orig = from_byte, to_byte_orig = to_byte; |
4064 move_gap (from); | 4062 |
4065 if (encodep) | 4063 if (from < GPT && GPT < to) |
4066 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); | 4064 move_gap_both (from, from_byte); |
4067 else | 4065 if (encodep) |
4068 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); | 4066 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); |
4069 if (from_byte == to_byte) | 4067 else |
4070 { | 4068 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); |
4071 coding->produced = len_byte; | 4069 if (from_byte == to_byte) |
4072 coding->produced_char = multibyte ? len : len_byte; | 4070 { |
4073 return 0; | 4071 coding->produced = len_byte; |
4074 } | 4072 coding->produced_char = multibyte ? len : len_byte; |
4075 | 4073 if (!replace) |
4076 /* Here, the excluded region by shrinking contains only ASCIIs. */ | 4074 /* We must record and adjust for this new text now. */ |
4077 from += (from_byte - from_byte_orig); | 4075 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); |
4078 to += (to_byte - to_byte_orig); | 4076 return 0; |
4079 len = to - from; | 4077 } |
4080 len_byte = to_byte - from_byte; | 4078 |
4079 head_skip = from_byte - from_byte_orig; | |
4080 tail_skip = to_byte_orig - to_byte; | |
4081 total_skip = head_skip + tail_skip; | |
4082 from += head_skip; | |
4083 to -= tail_skip; | |
4084 len -= total_skip; len_byte -= total_skip; | |
4085 } | |
4081 | 4086 |
4082 /* For converion, we must put the gap before the text in addition to | 4087 /* For converion, we must put the gap before the text in addition to |
4083 making the gap larger for efficient decoding. The required gap | 4088 making the gap larger for efficient decoding. The required gap |
4084 size starts from 2000 which is the magic number used in make_gap. | 4089 size starts from 2000 which is the magic number used in make_gap. |
4085 But, after one batch of conversion, it will be incremented if we | 4090 But, after one batch of conversion, it will be incremented if we |
4087 require = 2000; | 4092 require = 2000; |
4088 | 4093 |
4089 if (GAP_SIZE < require) | 4094 if (GAP_SIZE < require) |
4090 make_gap (require - GAP_SIZE); | 4095 make_gap (require - GAP_SIZE); |
4091 move_gap_both (from, from_byte); | 4096 move_gap_both (from, from_byte); |
4092 | |
4093 if (replace) | |
4094 adjust_before_replace (from, from_byte, to, to_byte); | |
4095 | 4097 |
4096 if (GPT - BEG < beg_unchanged) | 4098 if (GPT - BEG < beg_unchanged) |
4097 beg_unchanged = GPT - BEG; | 4099 beg_unchanged = GPT - BEG; |
4098 if (Z - GPT < end_unchanged) | 4100 if (Z - GPT < end_unchanged) |
4099 end_unchanged = Z - GPT; | 4101 end_unchanged = Z - GPT; |
4236 } | 4238 } |
4237 } | 4239 } |
4238 if (src - dst > 0) *dst = 0; /* Put an anchor. */ | 4240 if (src - dst > 0) *dst = 0; /* Put an anchor. */ |
4239 | 4241 |
4240 if (multibyte | 4242 if (multibyte |
4241 && (fake_multibyte || !encodep && (to - from) != (to_byte - from_byte))) | 4243 && (fake_multibyte |
4244 || !encodep && (to - from) != (to_byte - from_byte))) | |
4242 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); | 4245 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); |
4243 | 4246 |
4244 adjust_after_replace (from, from_byte, to, to_byte, | 4247 /* If we have shrinked the conversion area, adjust it now. */ |
4245 inserted, inserted_byte, replace); | 4248 if (total_skip > 0) |
4246 if (from_byte_orig == from_byte) | 4249 { |
4247 from_byte_orig = from_byte = PT_BYTE; | 4250 if (tail_skip > 0) |
4251 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip); | |
4252 inserted += total_skip; inserted_byte += total_skip; | |
4253 GAP_SIZE += total_skip; | |
4254 GPT -= head_skip; GPT_BYTE -= head_skip; | |
4255 ZV -= total_skip; ZV_BYTE -= total_skip; | |
4256 Z -= total_skip; Z_BYTE -= total_skip; | |
4257 from -= head_skip; from_byte -= head_skip; | |
4258 to += tail_skip; to_byte += tail_skip; | |
4259 } | |
4260 | |
4261 adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte); | |
4248 | 4262 |
4249 if (! encodep && ! NILP (coding->post_read_conversion)) | 4263 if (! encodep && ! NILP (coding->post_read_conversion)) |
4250 { | 4264 { |
4251 Lisp_Object val; | 4265 Lisp_Object val; |
4252 int orig_inserted = inserted, pos = PT; | 4266 int orig_inserted = inserted, pos = PT; |
4264 } | 4278 } |
4265 | 4279 |
4266 signal_after_change (from, to - from, inserted); | 4280 signal_after_change (from, to - from, inserted); |
4267 | 4281 |
4268 { | 4282 { |
4269 int skip = (to_byte_orig - to_byte) + (from_byte - from_byte_orig); | 4283 coding->consumed = to_byte - from_byte; |
4270 | 4284 coding->consumed_char = to - from; |
4271 coding->consumed = to_byte_orig - from_byte_orig; | 4285 coding->produced = inserted_byte; |
4272 coding->consumed_char = skip + (to - from); | 4286 coding->produced_char = inserted; |
4273 coding->produced = skip + inserted_byte; | |
4274 coding->produced_char = skip + inserted; | |
4275 } | 4287 } |
4276 | 4288 |
4277 return 0; | 4289 return 0; |
4278 } | 4290 } |
4279 | 4291 |