Mercurial > emacs
comparison src/coding.c @ 30833:2db6e42a6ba3
(MINIMUM_CONVERSION_BUFFER_SIZE): Macro deleted.
(conversion_buffer, conversion_buffer_size): Variables deleted.
(get_conversion_buffer): Function deleted.
(struct conversion_buffer): New structure.
(MAX_ALLOCA): New macro.
(allocate_conversion_buffer): New macro.
(extend_conversion_buffer, free_conversion_buffer): New functions.
(ccl_coding_driver): Set coding->result.
(decode_coding): Set coding->result to CODING_FINISH_NORMAL if
this is the last block of source.
(encode_coding): Likewise. Handle the source block as the last
one only when the whole source text is consumed.
(decode_coding_string): Handle the case that the output buffer is
too small to decode the whole source text. Use
allocate_conversion_buffer, extend_conversion_buffer and
free_conversion_buffer, not get_conversion_buffer.
(encode_coding_string): Likewise.
(init_coding): Function deleted.
(init_coding_once): Delete code to initialize
conversion_buffer_size.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 16 Aug 2000 01:37:20 +0000 |
parents | b72c2759ac70 |
children | 91e24edb537a |
comparison
equal
deleted
inserted
replaced
30832:b0b1acff966b | 30833:2db6e42a6ba3 |
---|---|
3868 magnification = 1; | 3868 magnification = 1; |
3869 | 3869 |
3870 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM); | 3870 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM); |
3871 } | 3871 } |
3872 | 3872 |
3873 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE | 3873 /* Working buffer for code conversion. */ |
3874 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024 | 3874 struct conversion_buffer |
3875 #endif | 3875 { |
3876 | 3876 int size; /* size of data. */ |
3877 char *conversion_buffer; | 3877 int on_stack; /* 1 if allocated by alloca. */ |
3878 int conversion_buffer_size; | 3878 unsigned char *data; |
3879 | 3879 }; |
3880 /* Return a pointer to a SIZE bytes of buffer to be used for encoding | 3880 |
3881 or decoding. Sufficient memory is allocated automatically. If we | 3881 /* Don't use alloca for allocating memory space larger than this, lest |
3882 run out of memory, return NULL. */ | 3882 we overflow their stack. */ |
3883 | 3883 #define MAX_ALLOCA 16*1024 |
3884 char * | 3884 |
3885 get_conversion_buffer (size) | 3885 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */ |
3886 int size; | 3886 #define allocate_conversion_buffer(buf, len) \ |
3887 { | 3887 do { \ |
3888 if (size > conversion_buffer_size) | 3888 if (len < MAX_ALLOCA) \ |
3889 { | 3889 { \ |
3890 char *buf; | 3890 buf.data = (unsigned char *) alloca (len); \ |
3891 int real_size = conversion_buffer_size * 2; | 3891 buf.on_stack = 1; \ |
3892 | 3892 } \ |
3893 while (real_size < size) real_size *= 2; | 3893 else \ |
3894 buf = (char *) xmalloc (real_size); | 3894 { \ |
3895 xfree (conversion_buffer); | 3895 buf.data = (unsigned char *) xmalloc (len); \ |
3896 conversion_buffer = buf; | 3896 buf.on_stack = 0; \ |
3897 conversion_buffer_size = real_size; | 3897 } \ |
3898 } | 3898 buf.size = len; \ |
3899 return conversion_buffer; | 3899 } while (0) |
3900 | |
3901 /* Double the allocated memory for *BUF. */ | |
3902 static void | |
3903 extend_conversion_buffer (buf) | |
3904 struct conversion_buffer *buf; | |
3905 { | |
3906 if (buf->on_stack) | |
3907 { | |
3908 unsigned char *save = buf->data; | |
3909 buf->data = (unsigned char *) xmalloc (buf->size * 2); | |
3910 bcopy (save, buf->data, buf->size); | |
3911 buf->on_stack = 0; | |
3912 } | |
3913 else | |
3914 { | |
3915 buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2); | |
3916 } | |
3917 buf->size *= 2; | |
3918 } | |
3919 | |
3920 /* Free the allocated memory for BUF if it is not on stack. */ | |
3921 static void | |
3922 free_conversion_buffer (buf) | |
3923 struct conversion_buffer *buf; | |
3924 { | |
3925 if (!buf->on_stack) | |
3926 xfree (buf->data); | |
3900 } | 3927 } |
3901 | 3928 |
3902 int | 3929 int |
3903 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) | 3930 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) |
3904 struct coding_system *coding; | 3931 struct coding_system *coding; |
3927 } | 3954 } |
3928 | 3955 |
3929 switch (ccl->status) | 3956 switch (ccl->status) |
3930 { | 3957 { |
3931 case CCL_STAT_SUSPEND_BY_SRC: | 3958 case CCL_STAT_SUSPEND_BY_SRC: |
3932 result = CODING_FINISH_INSUFFICIENT_SRC; | 3959 coding->result = CODING_FINISH_INSUFFICIENT_SRC; |
3933 break; | 3960 break; |
3934 case CCL_STAT_SUSPEND_BY_DST: | 3961 case CCL_STAT_SUSPEND_BY_DST: |
3935 result = CODING_FINISH_INSUFFICIENT_DST; | 3962 coding->result = CODING_FINISH_INSUFFICIENT_DST; |
3936 break; | 3963 break; |
3937 case CCL_STAT_QUIT: | 3964 case CCL_STAT_QUIT: |
3938 case CCL_STAT_INVALID_CMD: | 3965 case CCL_STAT_INVALID_CMD: |
3939 result = CODING_FINISH_INTERRUPT; | 3966 coding->result = CODING_FINISH_INTERRUPT; |
3940 break; | 3967 break; |
3941 default: | 3968 default: |
3942 result = CODING_FINISH_NORMAL; | 3969 coding->result = CODING_FINISH_NORMAL; |
3943 break; | 3970 break; |
3944 } | 3971 } |
3945 return result; | 3972 return coding->result; |
3946 } | 3973 } |
3947 | 3974 |
3948 /* Decode EOL format of the text at PTR of BYTES length destructively | 3975 /* Decode EOL format of the text at PTR of BYTES length destructively |
3949 according to CODING->eol_type. This is called after the CCL | 3976 according to CODING->eol_type. This is called after the CCL |
3950 program produced a decoded text at PTR. If we do CRLF->LF | 3977 program produced a decoded text at PTR. If we do CRLF->LF |
4168 dst += CHAR_STRING (c, dst); | 4195 dst += CHAR_STRING (c, dst); |
4169 coding->produced_char++; | 4196 coding->produced_char++; |
4170 } | 4197 } |
4171 coding->consumed = coding->consumed_char = src - source; | 4198 coding->consumed = coding->consumed_char = src - source; |
4172 coding->produced = dst - destination; | 4199 coding->produced = dst - destination; |
4200 coding->result = CODING_FINISH_NORMAL; | |
4173 } | 4201 } |
4174 | 4202 |
4175 if (!coding->dst_multibyte) | 4203 if (!coding->dst_multibyte) |
4176 { | 4204 { |
4177 coding->produced = str_as_unibyte (destination, coding->produced); | 4205 coding->produced = str_as_unibyte (destination, coding->produced); |
4229 | 4257 |
4230 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC | 4258 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC |
4231 && coding->consumed == src_bytes) | 4259 && coding->consumed == src_bytes) |
4232 coding->result = CODING_FINISH_NORMAL; | 4260 coding->result = CODING_FINISH_NORMAL; |
4233 | 4261 |
4234 if (coding->mode & CODING_MODE_LAST_BLOCK) | 4262 if (coding->mode & CODING_MODE_LAST_BLOCK |
4263 && coding->result == CODING_FINISH_INSUFFICIENT_SRC) | |
4235 { | 4264 { |
4236 unsigned char *src = source + coding->consumed; | 4265 unsigned char *src = source + coding->consumed; |
4237 unsigned char *src_end = src + src_bytes; | 4266 unsigned char *src_end = src + src_bytes; |
4238 unsigned char *dst = destination + coding->produced; | 4267 unsigned char *dst = destination + coding->produced; |
4239 | 4268 |
4250 len = str_as_unibyte (dst, len); | 4279 len = str_as_unibyte (dst, len); |
4251 dst += len; | 4280 dst += len; |
4252 coding->consumed = src_bytes; | 4281 coding->consumed = src_bytes; |
4253 } | 4282 } |
4254 coding->produced = coding->produced_char = dst - destination; | 4283 coding->produced = coding->produced_char = dst - destination; |
4284 coding->result = CODING_FINISH_NORMAL; | |
4255 } | 4285 } |
4256 | 4286 |
4257 return coding->result; | 4287 return coding->result; |
4258 } | 4288 } |
4259 | 4289 |
5195 Lisp_Object str; | 5225 Lisp_Object str; |
5196 struct coding_system *coding; | 5226 struct coding_system *coding; |
5197 int nocopy; | 5227 int nocopy; |
5198 { | 5228 { |
5199 int len; | 5229 int len; |
5200 char *buf; | 5230 struct conversion_buffer buf; |
5201 int from, to, to_byte; | 5231 int from, to, to_byte; |
5202 struct gcpro gcpro1; | 5232 struct gcpro gcpro1; |
5203 Lisp_Object saved_coding_symbol; | 5233 Lisp_Object saved_coding_symbol; |
5204 int result; | 5234 int result; |
5205 int require_decoding; | 5235 int require_decoding; |
5236 int shrinked_bytes = 0; | |
5237 Lisp_Object newstr; | |
5238 int consumed, produced, produced_char; | |
5206 | 5239 |
5207 from = 0; | 5240 from = 0; |
5208 to = XSTRING (str)->size; | 5241 to = XSTRING (str)->size; |
5209 to_byte = STRING_BYTES (XSTRING (str)); | 5242 to_byte = STRING_BYTES (XSTRING (str)); |
5210 | 5243 |
5245 && coding->type != coding_type_raw_text); | 5278 && coding->type != coding_type_raw_text); |
5246 | 5279 |
5247 /* Try to skip the heading and tailing ASCIIs. */ | 5280 /* Try to skip the heading and tailing ASCIIs. */ |
5248 if (require_decoding && coding->type != coding_type_ccl) | 5281 if (require_decoding && coding->type != coding_type_ccl) |
5249 { | 5282 { |
5250 int from_orig = from; | |
5251 | |
5252 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, | 5283 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, |
5253 0); | 5284 0); |
5254 if (from == to_byte) | 5285 if (from == to_byte) |
5255 require_decoding = 0; | 5286 require_decoding = 0; |
5287 shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte); | |
5256 } | 5288 } |
5257 | 5289 |
5258 if (!require_decoding) | 5290 if (!require_decoding) |
5259 { | 5291 { |
5260 coding->consumed = STRING_BYTES (XSTRING (str)); | 5292 coding->consumed = STRING_BYTES (XSTRING (str)); |
5269 return (nocopy ? str : Fcopy_sequence (str)); | 5301 return (nocopy ? str : Fcopy_sequence (str)); |
5270 } | 5302 } |
5271 | 5303 |
5272 if (coding->composing != COMPOSITION_DISABLED) | 5304 if (coding->composing != COMPOSITION_DISABLED) |
5273 coding_allocate_composition_data (coding, from); | 5305 coding_allocate_composition_data (coding, from); |
5274 | |
5275 len = decoding_buffer_size (coding, to_byte - from); | 5306 len = decoding_buffer_size (coding, to_byte - from); |
5276 len += from + STRING_BYTES (XSTRING (str)) - to_byte; | 5307 allocate_conversion_buffer (buf, len); |
5277 GCPRO1 (str); | 5308 |
5278 buf = get_conversion_buffer (len); | 5309 consumed = produced = produced_char = 0; |
5279 UNGCPRO; | 5310 while (1) |
5280 | 5311 { |
5312 result = decode_coding (coding, XSTRING (str)->data + from + consumed, | |
5313 buf.data + produced, to_byte - from - consumed, | |
5314 buf.size - produced); | |
5315 consumed += coding->consumed; | |
5316 produced += coding->produced; | |
5317 produced_char += coding->produced_char; | |
5318 if (result == CODING_FINISH_NORMAL) | |
5319 break; | |
5320 if (result == CODING_FINISH_INSUFFICIENT_CMP) | |
5321 coding_allocate_composition_data (coding, from + produced_char); | |
5322 else if (result == CODING_FINISH_INSUFFICIENT_DST) | |
5323 extend_conversion_buffer (&buf); | |
5324 else if (result == CODING_FINISH_INCONSISTENT_EOL) | |
5325 { | |
5326 /* Recover the original EOL format. */ | |
5327 if (coding->eol_type == CODING_EOL_CR) | |
5328 { | |
5329 unsigned char *p; | |
5330 for (p = buf.data; p < buf.data + produced; p++) | |
5331 if (*p == '\n') *p = '\r'; | |
5332 } | |
5333 else if (coding->eol_type == CODING_EOL_CRLF) | |
5334 { | |
5335 int num_eol = 0; | |
5336 unsigned char *p0, *p1; | |
5337 for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++) | |
5338 if (*p0 == '\n') num_eol++; | |
5339 if (produced + num_eol >= buf.size) | |
5340 extend_conversion_buffer (&buf); | |
5341 for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;) | |
5342 { | |
5343 *--p1 = *--p0; | |
5344 if (*p0 == '\n') *--p1 = '\r'; | |
5345 } | |
5346 produced += num_eol; | |
5347 produced_char += num_eol; | |
5348 } | |
5349 coding->eol_type = CODING_EOL_LF; | |
5350 coding->symbol = saved_coding_symbol; | |
5351 } | |
5352 } | |
5353 | |
5354 if (coding->dst_multibyte) | |
5355 newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes, | |
5356 produced + shrinked_bytes); | |
5357 else | |
5358 newstr = make_uninit_string (produced + shrinked_bytes); | |
5281 if (from > 0) | 5359 if (from > 0) |
5282 bcopy (XSTRING (str)->data, buf, from); | 5360 bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from); |
5283 result = decode_coding (coding, XSTRING (str)->data + from, | 5361 bcopy (buf.data, XSTRING (newstr)->data + from, produced); |
5284 buf + from, to_byte - from, len); | 5362 if (shrinked_bytes > from) |
5285 if (result == CODING_FINISH_INCONSISTENT_EOL) | 5363 bcopy (XSTRING (str)->data + to_byte, |
5286 { | 5364 XSTRING (newstr)->data + from + produced, |
5287 /* We simply try to decode the whole string again but without | 5365 shrinked_bytes - from); |
5288 eol-conversion this time. */ | 5366 free_conversion_buffer (&buf); |
5289 coding->eol_type = CODING_EOL_LF; | |
5290 coding->symbol = saved_coding_symbol; | |
5291 coding_free_composition_data (coding); | |
5292 return decode_coding_string (str, coding, nocopy); | |
5293 } | |
5294 | |
5295 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, | |
5296 STRING_BYTES (XSTRING (str)) - to_byte); | |
5297 | |
5298 len = from + STRING_BYTES (XSTRING (str)) - to_byte; | |
5299 if (coding->dst_multibyte) | |
5300 str = make_multibyte_string (buf, len + coding->produced_char, | |
5301 len + coding->produced); | |
5302 else | |
5303 str = make_unibyte_string (buf, len + coding->produced); | |
5304 | 5367 |
5305 if (coding->cmp_data && coding->cmp_data->used) | 5368 if (coding->cmp_data && coding->cmp_data->used) |
5306 coding_restore_composition (coding, str); | 5369 coding_restore_composition (coding, newstr); |
5307 coding_free_composition_data (coding); | 5370 coding_free_composition_data (coding); |
5308 | 5371 |
5309 if (SYMBOLP (coding->post_read_conversion) | 5372 if (SYMBOLP (coding->post_read_conversion) |
5310 && !NILP (Ffboundp (coding->post_read_conversion))) | 5373 && !NILP (Ffboundp (coding->post_read_conversion))) |
5311 str = run_pre_post_conversion_on_str (str, coding, 0); | 5374 newstr = run_pre_post_conversion_on_str (newstr, coding, 0); |
5312 | 5375 |
5313 return str; | 5376 return newstr; |
5314 } | 5377 } |
5315 | 5378 |
5316 Lisp_Object | 5379 Lisp_Object |
5317 encode_coding_string (str, coding, nocopy) | 5380 encode_coding_string (str, coding, nocopy) |
5318 Lisp_Object str; | 5381 Lisp_Object str; |
5319 struct coding_system *coding; | 5382 struct coding_system *coding; |
5320 int nocopy; | 5383 int nocopy; |
5321 { | 5384 { |
5322 int len; | 5385 int len; |
5323 char *buf; | 5386 struct conversion_buffer buf; |
5324 int from, to, to_byte; | 5387 int from, to, to_byte; |
5325 struct gcpro gcpro1; | 5388 struct gcpro gcpro1; |
5326 Lisp_Object saved_coding_symbol; | 5389 Lisp_Object saved_coding_symbol; |
5327 int result; | 5390 int result; |
5391 int shrinked_bytes = 0; | |
5392 Lisp_Object newstr; | |
5393 int consumed, consumed_char, produced; | |
5328 | 5394 |
5329 if (SYMBOLP (coding->pre_write_conversion) | 5395 if (SYMBOLP (coding->pre_write_conversion) |
5330 && !NILP (Ffboundp (coding->pre_write_conversion))) | 5396 && !NILP (Ffboundp (coding->pre_write_conversion))) |
5331 str = run_pre_post_conversion_on_str (str, coding, 1); | 5397 str = run_pre_post_conversion_on_str (str, coding, 1); |
5332 | 5398 |
5354 coding_save_composition (coding, from, to, str); | 5420 coding_save_composition (coding, from, to, str); |
5355 | 5421 |
5356 /* Try to skip the heading and tailing ASCIIs. */ | 5422 /* Try to skip the heading and tailing ASCIIs. */ |
5357 if (coding->type != coding_type_ccl) | 5423 if (coding->type != coding_type_ccl) |
5358 { | 5424 { |
5359 int from_orig = from; | |
5360 | |
5361 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, | 5425 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, |
5362 1); | 5426 1); |
5363 if (from == to_byte) | 5427 if (from == to_byte) |
5364 return (nocopy ? str : Fcopy_sequence (str)); | 5428 return (nocopy ? str : Fcopy_sequence (str)); |
5429 shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte); | |
5365 } | 5430 } |
5366 | 5431 |
5367 len = encoding_buffer_size (coding, to_byte - from); | 5432 len = encoding_buffer_size (coding, to_byte - from); |
5368 len += from + STRING_BYTES (XSTRING (str)) - to_byte; | 5433 allocate_conversion_buffer (buf, len); |
5369 GCPRO1 (str); | 5434 |
5370 buf = get_conversion_buffer (len); | 5435 consumed = consumed_char = produced = 0; |
5371 UNGCPRO; | 5436 |
5372 | 5437 while (1) |
5438 { | |
5439 result = encode_coding (coding, XSTRING (str)->data + from + consumed, | |
5440 buf.data + produced, to_byte - from - consumed, | |
5441 buf.size - produced); | |
5442 consumed += coding->consumed; | |
5443 produced += coding->produced; | |
5444 if (result == CODING_FINISH_NORMAL) | |
5445 break; | |
5446 /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */ | |
5447 extend_conversion_buffer (&buf); | |
5448 } | |
5449 | |
5450 newstr = make_uninit_string (produced + shrinked_bytes); | |
5373 if (from > 0) | 5451 if (from > 0) |
5374 bcopy (XSTRING (str)->data, buf, from); | 5452 bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from); |
5375 result = encode_coding (coding, XSTRING (str)->data + from, | 5453 bcopy (buf.data, XSTRING (newstr)->data + from, produced); |
5376 buf + from, to_byte - from, len); | 5454 if (shrinked_bytes > from) |
5377 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, | 5455 bcopy (XSTRING (str)->data + to_byte, |
5378 STRING_BYTES (XSTRING (str)) - to_byte); | 5456 XSTRING (newstr)->data + from + produced, |
5379 | 5457 shrinked_bytes - from); |
5380 len = from + STRING_BYTES (XSTRING (str)) - to_byte; | 5458 |
5381 str = make_unibyte_string (buf, len + coding->produced); | 5459 free_conversion_buffer (&buf); |
5382 coding_free_composition_data (coding); | 5460 coding_free_composition_data (coding); |
5383 | 5461 |
5384 return str; | 5462 return newstr; |
5385 } | 5463 } |
5386 | 5464 |
5387 | 5465 |
5388 #ifdef emacs | 5466 #ifdef emacs |
5389 /*** 8. Emacs Lisp library functions ***/ | 5467 /*** 8. Emacs Lisp library functions ***/ |
6206 | 6284 |
6207 | 6285 |
6208 /*** 9. Post-amble ***/ | 6286 /*** 9. Post-amble ***/ |
6209 | 6287 |
6210 void | 6288 void |
6211 init_coding () | |
6212 { | |
6213 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE); | |
6214 } | |
6215 | |
6216 void | |
6217 init_coding_once () | 6289 init_coding_once () |
6218 { | 6290 { |
6219 int i; | 6291 int i; |
6220 | 6292 |
6221 /* Emacs' internal format specific initialize routine. */ | 6293 /* Emacs' internal format specific initialize routine. */ |
6250 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; | 6322 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; |
6251 iso_code_class[ISO_CODE_ESC] = ISO_escape; | 6323 iso_code_class[ISO_CODE_ESC] = ISO_escape; |
6252 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; | 6324 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; |
6253 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; | 6325 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; |
6254 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; | 6326 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; |
6255 | |
6256 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE; | |
6257 | 6327 |
6258 setup_coding_system (Qnil, &keyboard_coding); | 6328 setup_coding_system (Qnil, &keyboard_coding); |
6259 setup_coding_system (Qnil, &terminal_coding); | 6329 setup_coding_system (Qnil, &terminal_coding); |
6260 setup_coding_system (Qnil, &safe_terminal_coding); | 6330 setup_coding_system (Qnil, &safe_terminal_coding); |
6261 setup_coding_system (Qnil, &default_buffer_file_coding); | 6331 setup_coding_system (Qnil, &default_buffer_file_coding); |