comparison src/coding.c @ 30833:2db6e42a6ba3

(MINIMUM_CONVERSION_BUFFER_SIZE): Macro deleted. (conversion_buffer, conversion_buffer_size): Variables deleted. (get_conversion_buffer): Function deleted. (struct conversion_buffer): New structure. (MAX_ALLOCA): New macro. (allocate_conversion_buffer): New macro. (extend_conversion_buffer, free_conversion_buffer): New functions. (ccl_coding_driver): Set coding->result. (decode_coding): Set coding->result to CODING_FINISH_NORMAL if this is the last block of source. (encode_coding): Likewise. Handle the source block as the last one only when the whole source text is consumed. (decode_coding_string): Handle the case that the output buffer is too small to decode the whole source text. Use allocate_conversion_buffer, extend_conversion_buffer and free_conversion_buffer, not get_conversion_buffer. (encode_coding_string): Likewise. (init_coding): Function deleted. (init_coding_once): Delete code to initialize conversion_buffer_size.
author Kenichi Handa <handa@m17n.org>
date Wed, 16 Aug 2000 01:37:20 +0000
parents b72c2759ac70
children 91e24edb537a
comparison
equal deleted inserted replaced
30832:b0b1acff966b 30833:2db6e42a6ba3
3868 magnification = 1; 3868 magnification = 1;
3869 3869
3870 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM); 3870 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3871 } 3871 }
3872 3872
3873 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE 3873 /* Working buffer for code conversion. */
3874 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024 3874 struct conversion_buffer
3875 #endif 3875 {
3876 3876 int size; /* size of data. */
3877 char *conversion_buffer; 3877 int on_stack; /* 1 if allocated by alloca. */
3878 int conversion_buffer_size; 3878 unsigned char *data;
3879 3879 };
3880 /* Return a pointer to a SIZE bytes of buffer to be used for encoding 3880
3881 or decoding. Sufficient memory is allocated automatically. If we 3881 /* Don't use alloca for allocating memory space larger than this, lest
3882 run out of memory, return NULL. */ 3882 we overflow their stack. */
3883 3883 #define MAX_ALLOCA 16*1024
3884 char * 3884
3885 get_conversion_buffer (size) 3885 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
3886 int size; 3886 #define allocate_conversion_buffer(buf, len) \
3887 { 3887 do { \
3888 if (size > conversion_buffer_size) 3888 if (len < MAX_ALLOCA) \
3889 { 3889 { \
3890 char *buf; 3890 buf.data = (unsigned char *) alloca (len); \
3891 int real_size = conversion_buffer_size * 2; 3891 buf.on_stack = 1; \
3892 3892 } \
3893 while (real_size < size) real_size *= 2; 3893 else \
3894 buf = (char *) xmalloc (real_size); 3894 { \
3895 xfree (conversion_buffer); 3895 buf.data = (unsigned char *) xmalloc (len); \
3896 conversion_buffer = buf; 3896 buf.on_stack = 0; \
3897 conversion_buffer_size = real_size; 3897 } \
3898 } 3898 buf.size = len; \
3899 return conversion_buffer; 3899 } while (0)
3900
3901 /* Double the allocated memory for *BUF. */
3902 static void
3903 extend_conversion_buffer (buf)
3904 struct conversion_buffer *buf;
3905 {
3906 if (buf->on_stack)
3907 {
3908 unsigned char *save = buf->data;
3909 buf->data = (unsigned char *) xmalloc (buf->size * 2);
3910 bcopy (save, buf->data, buf->size);
3911 buf->on_stack = 0;
3912 }
3913 else
3914 {
3915 buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
3916 }
3917 buf->size *= 2;
3918 }
3919
3920 /* Free the allocated memory for BUF if it is not on stack. */
3921 static void
3922 free_conversion_buffer (buf)
3923 struct conversion_buffer *buf;
3924 {
3925 if (!buf->on_stack)
3926 xfree (buf->data);
3900 } 3927 }
3901 3928
3902 int 3929 int
3903 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) 3930 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3904 struct coding_system *coding; 3931 struct coding_system *coding;
3927 } 3954 }
3928 3955
3929 switch (ccl->status) 3956 switch (ccl->status)
3930 { 3957 {
3931 case CCL_STAT_SUSPEND_BY_SRC: 3958 case CCL_STAT_SUSPEND_BY_SRC:
3932 result = CODING_FINISH_INSUFFICIENT_SRC; 3959 coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3933 break; 3960 break;
3934 case CCL_STAT_SUSPEND_BY_DST: 3961 case CCL_STAT_SUSPEND_BY_DST:
3935 result = CODING_FINISH_INSUFFICIENT_DST; 3962 coding->result = CODING_FINISH_INSUFFICIENT_DST;
3936 break; 3963 break;
3937 case CCL_STAT_QUIT: 3964 case CCL_STAT_QUIT:
3938 case CCL_STAT_INVALID_CMD: 3965 case CCL_STAT_INVALID_CMD:
3939 result = CODING_FINISH_INTERRUPT; 3966 coding->result = CODING_FINISH_INTERRUPT;
3940 break; 3967 break;
3941 default: 3968 default:
3942 result = CODING_FINISH_NORMAL; 3969 coding->result = CODING_FINISH_NORMAL;
3943 break; 3970 break;
3944 } 3971 }
3945 return result; 3972 return coding->result;
3946 } 3973 }
3947 3974
3948 /* Decode EOL format of the text at PTR of BYTES length destructively 3975 /* Decode EOL format of the text at PTR of BYTES length destructively
3949 according to CODING->eol_type. This is called after the CCL 3976 according to CODING->eol_type. This is called after the CCL
3950 program produced a decoded text at PTR. If we do CRLF->LF 3977 program produced a decoded text at PTR. If we do CRLF->LF
4168 dst += CHAR_STRING (c, dst); 4195 dst += CHAR_STRING (c, dst);
4169 coding->produced_char++; 4196 coding->produced_char++;
4170 } 4197 }
4171 coding->consumed = coding->consumed_char = src - source; 4198 coding->consumed = coding->consumed_char = src - source;
4172 coding->produced = dst - destination; 4199 coding->produced = dst - destination;
4200 coding->result = CODING_FINISH_NORMAL;
4173 } 4201 }
4174 4202
4175 if (!coding->dst_multibyte) 4203 if (!coding->dst_multibyte)
4176 { 4204 {
4177 coding->produced = str_as_unibyte (destination, coding->produced); 4205 coding->produced = str_as_unibyte (destination, coding->produced);
4229 4257
4230 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC 4258 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4231 && coding->consumed == src_bytes) 4259 && coding->consumed == src_bytes)
4232 coding->result = CODING_FINISH_NORMAL; 4260 coding->result = CODING_FINISH_NORMAL;
4233 4261
4234 if (coding->mode & CODING_MODE_LAST_BLOCK) 4262 if (coding->mode & CODING_MODE_LAST_BLOCK
4263 && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4235 { 4264 {
4236 unsigned char *src = source + coding->consumed; 4265 unsigned char *src = source + coding->consumed;
4237 unsigned char *src_end = src + src_bytes; 4266 unsigned char *src_end = src + src_bytes;
4238 unsigned char *dst = destination + coding->produced; 4267 unsigned char *dst = destination + coding->produced;
4239 4268
4250 len = str_as_unibyte (dst, len); 4279 len = str_as_unibyte (dst, len);
4251 dst += len; 4280 dst += len;
4252 coding->consumed = src_bytes; 4281 coding->consumed = src_bytes;
4253 } 4282 }
4254 coding->produced = coding->produced_char = dst - destination; 4283 coding->produced = coding->produced_char = dst - destination;
4284 coding->result = CODING_FINISH_NORMAL;
4255 } 4285 }
4256 4286
4257 return coding->result; 4287 return coding->result;
4258 } 4288 }
4259 4289
5195 Lisp_Object str; 5225 Lisp_Object str;
5196 struct coding_system *coding; 5226 struct coding_system *coding;
5197 int nocopy; 5227 int nocopy;
5198 { 5228 {
5199 int len; 5229 int len;
5200 char *buf; 5230 struct conversion_buffer buf;
5201 int from, to, to_byte; 5231 int from, to, to_byte;
5202 struct gcpro gcpro1; 5232 struct gcpro gcpro1;
5203 Lisp_Object saved_coding_symbol; 5233 Lisp_Object saved_coding_symbol;
5204 int result; 5234 int result;
5205 int require_decoding; 5235 int require_decoding;
5236 int shrinked_bytes = 0;
5237 Lisp_Object newstr;
5238 int consumed, produced, produced_char;
5206 5239
5207 from = 0; 5240 from = 0;
5208 to = XSTRING (str)->size; 5241 to = XSTRING (str)->size;
5209 to_byte = STRING_BYTES (XSTRING (str)); 5242 to_byte = STRING_BYTES (XSTRING (str));
5210 5243
5245 && coding->type != coding_type_raw_text); 5278 && coding->type != coding_type_raw_text);
5246 5279
5247 /* Try to skip the heading and tailing ASCIIs. */ 5280 /* Try to skip the heading and tailing ASCIIs. */
5248 if (require_decoding && coding->type != coding_type_ccl) 5281 if (require_decoding && coding->type != coding_type_ccl)
5249 { 5282 {
5250 int from_orig = from;
5251
5252 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, 5283 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5253 0); 5284 0);
5254 if (from == to_byte) 5285 if (from == to_byte)
5255 require_decoding = 0; 5286 require_decoding = 0;
5287 shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5256 } 5288 }
5257 5289
5258 if (!require_decoding) 5290 if (!require_decoding)
5259 { 5291 {
5260 coding->consumed = STRING_BYTES (XSTRING (str)); 5292 coding->consumed = STRING_BYTES (XSTRING (str));
5269 return (nocopy ? str : Fcopy_sequence (str)); 5301 return (nocopy ? str : Fcopy_sequence (str));
5270 } 5302 }
5271 5303
5272 if (coding->composing != COMPOSITION_DISABLED) 5304 if (coding->composing != COMPOSITION_DISABLED)
5273 coding_allocate_composition_data (coding, from); 5305 coding_allocate_composition_data (coding, from);
5274
5275 len = decoding_buffer_size (coding, to_byte - from); 5306 len = decoding_buffer_size (coding, to_byte - from);
5276 len += from + STRING_BYTES (XSTRING (str)) - to_byte; 5307 allocate_conversion_buffer (buf, len);
5277 GCPRO1 (str); 5308
5278 buf = get_conversion_buffer (len); 5309 consumed = produced = produced_char = 0;
5279 UNGCPRO; 5310 while (1)
5280 5311 {
5312 result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5313 buf.data + produced, to_byte - from - consumed,
5314 buf.size - produced);
5315 consumed += coding->consumed;
5316 produced += coding->produced;
5317 produced_char += coding->produced_char;
5318 if (result == CODING_FINISH_NORMAL)
5319 break;
5320 if (result == CODING_FINISH_INSUFFICIENT_CMP)
5321 coding_allocate_composition_data (coding, from + produced_char);
5322 else if (result == CODING_FINISH_INSUFFICIENT_DST)
5323 extend_conversion_buffer (&buf);
5324 else if (result == CODING_FINISH_INCONSISTENT_EOL)
5325 {
5326 /* Recover the original EOL format. */
5327 if (coding->eol_type == CODING_EOL_CR)
5328 {
5329 unsigned char *p;
5330 for (p = buf.data; p < buf.data + produced; p++)
5331 if (*p == '\n') *p = '\r';
5332 }
5333 else if (coding->eol_type == CODING_EOL_CRLF)
5334 {
5335 int num_eol = 0;
5336 unsigned char *p0, *p1;
5337 for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5338 if (*p0 == '\n') num_eol++;
5339 if (produced + num_eol >= buf.size)
5340 extend_conversion_buffer (&buf);
5341 for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5342 {
5343 *--p1 = *--p0;
5344 if (*p0 == '\n') *--p1 = '\r';
5345 }
5346 produced += num_eol;
5347 produced_char += num_eol;
5348 }
5349 coding->eol_type = CODING_EOL_LF;
5350 coding->symbol = saved_coding_symbol;
5351 }
5352 }
5353
5354 if (coding->dst_multibyte)
5355 newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5356 produced + shrinked_bytes);
5357 else
5358 newstr = make_uninit_string (produced + shrinked_bytes);
5281 if (from > 0) 5359 if (from > 0)
5282 bcopy (XSTRING (str)->data, buf, from); 5360 bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5283 result = decode_coding (coding, XSTRING (str)->data + from, 5361 bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5284 buf + from, to_byte - from, len); 5362 if (shrinked_bytes > from)
5285 if (result == CODING_FINISH_INCONSISTENT_EOL) 5363 bcopy (XSTRING (str)->data + to_byte,
5286 { 5364 XSTRING (newstr)->data + from + produced,
5287 /* We simply try to decode the whole string again but without 5365 shrinked_bytes - from);
5288 eol-conversion this time. */ 5366 free_conversion_buffer (&buf);
5289 coding->eol_type = CODING_EOL_LF;
5290 coding->symbol = saved_coding_symbol;
5291 coding_free_composition_data (coding);
5292 return decode_coding_string (str, coding, nocopy);
5293 }
5294
5295 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
5296 STRING_BYTES (XSTRING (str)) - to_byte);
5297
5298 len = from + STRING_BYTES (XSTRING (str)) - to_byte;
5299 if (coding->dst_multibyte)
5300 str = make_multibyte_string (buf, len + coding->produced_char,
5301 len + coding->produced);
5302 else
5303 str = make_unibyte_string (buf, len + coding->produced);
5304 5367
5305 if (coding->cmp_data && coding->cmp_data->used) 5368 if (coding->cmp_data && coding->cmp_data->used)
5306 coding_restore_composition (coding, str); 5369 coding_restore_composition (coding, newstr);
5307 coding_free_composition_data (coding); 5370 coding_free_composition_data (coding);
5308 5371
5309 if (SYMBOLP (coding->post_read_conversion) 5372 if (SYMBOLP (coding->post_read_conversion)
5310 && !NILP (Ffboundp (coding->post_read_conversion))) 5373 && !NILP (Ffboundp (coding->post_read_conversion)))
5311 str = run_pre_post_conversion_on_str (str, coding, 0); 5374 newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5312 5375
5313 return str; 5376 return newstr;
5314 } 5377 }
5315 5378
5316 Lisp_Object 5379 Lisp_Object
5317 encode_coding_string (str, coding, nocopy) 5380 encode_coding_string (str, coding, nocopy)
5318 Lisp_Object str; 5381 Lisp_Object str;
5319 struct coding_system *coding; 5382 struct coding_system *coding;
5320 int nocopy; 5383 int nocopy;
5321 { 5384 {
5322 int len; 5385 int len;
5323 char *buf; 5386 struct conversion_buffer buf;
5324 int from, to, to_byte; 5387 int from, to, to_byte;
5325 struct gcpro gcpro1; 5388 struct gcpro gcpro1;
5326 Lisp_Object saved_coding_symbol; 5389 Lisp_Object saved_coding_symbol;
5327 int result; 5390 int result;
5391 int shrinked_bytes = 0;
5392 Lisp_Object newstr;
5393 int consumed, consumed_char, produced;
5328 5394
5329 if (SYMBOLP (coding->pre_write_conversion) 5395 if (SYMBOLP (coding->pre_write_conversion)
5330 && !NILP (Ffboundp (coding->pre_write_conversion))) 5396 && !NILP (Ffboundp (coding->pre_write_conversion)))
5331 str = run_pre_post_conversion_on_str (str, coding, 1); 5397 str = run_pre_post_conversion_on_str (str, coding, 1);
5332 5398
5354 coding_save_composition (coding, from, to, str); 5420 coding_save_composition (coding, from, to, str);
5355 5421
5356 /* Try to skip the heading and tailing ASCIIs. */ 5422 /* Try to skip the heading and tailing ASCIIs. */
5357 if (coding->type != coding_type_ccl) 5423 if (coding->type != coding_type_ccl)
5358 { 5424 {
5359 int from_orig = from;
5360
5361 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, 5425 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5362 1); 5426 1);
5363 if (from == to_byte) 5427 if (from == to_byte)
5364 return (nocopy ? str : Fcopy_sequence (str)); 5428 return (nocopy ? str : Fcopy_sequence (str));
5429 shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5365 } 5430 }
5366 5431
5367 len = encoding_buffer_size (coding, to_byte - from); 5432 len = encoding_buffer_size (coding, to_byte - from);
5368 len += from + STRING_BYTES (XSTRING (str)) - to_byte; 5433 allocate_conversion_buffer (buf, len);
5369 GCPRO1 (str); 5434
5370 buf = get_conversion_buffer (len); 5435 consumed = consumed_char = produced = 0;
5371 UNGCPRO; 5436
5372 5437 while (1)
5438 {
5439 result = encode_coding (coding, XSTRING (str)->data + from + consumed,
5440 buf.data + produced, to_byte - from - consumed,
5441 buf.size - produced);
5442 consumed += coding->consumed;
5443 produced += coding->produced;
5444 if (result == CODING_FINISH_NORMAL)
5445 break;
5446 /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */
5447 extend_conversion_buffer (&buf);
5448 }
5449
5450 newstr = make_uninit_string (produced + shrinked_bytes);
5373 if (from > 0) 5451 if (from > 0)
5374 bcopy (XSTRING (str)->data, buf, from); 5452 bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5375 result = encode_coding (coding, XSTRING (str)->data + from, 5453 bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5376 buf + from, to_byte - from, len); 5454 if (shrinked_bytes > from)
5377 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, 5455 bcopy (XSTRING (str)->data + to_byte,
5378 STRING_BYTES (XSTRING (str)) - to_byte); 5456 XSTRING (newstr)->data + from + produced,
5379 5457 shrinked_bytes - from);
5380 len = from + STRING_BYTES (XSTRING (str)) - to_byte; 5458
5381 str = make_unibyte_string (buf, len + coding->produced); 5459 free_conversion_buffer (&buf);
5382 coding_free_composition_data (coding); 5460 coding_free_composition_data (coding);
5383 5461
5384 return str; 5462 return newstr;
5385 } 5463 }
5386 5464
5387 5465
5388 #ifdef emacs 5466 #ifdef emacs
5389 /*** 8. Emacs Lisp library functions ***/ 5467 /*** 8. Emacs Lisp library functions ***/
6206 6284
6207 6285
6208 /*** 9. Post-amble ***/ 6286 /*** 9. Post-amble ***/
6209 6287
6210 void 6288 void
6211 init_coding ()
6212 {
6213 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
6214 }
6215
6216 void
6217 init_coding_once () 6289 init_coding_once ()
6218 { 6290 {
6219 int i; 6291 int i;
6220 6292
6221 /* Emacs' internal format specific initialize routine. */ 6293 /* Emacs' internal format specific initialize routine. */
6250 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; 6322 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6251 iso_code_class[ISO_CODE_ESC] = ISO_escape; 6323 iso_code_class[ISO_CODE_ESC] = ISO_escape;
6252 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; 6324 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6253 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; 6325 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6254 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; 6326 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6255
6256 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
6257 6327
6258 setup_coding_system (Qnil, &keyboard_coding); 6328 setup_coding_system (Qnil, &keyboard_coding);
6259 setup_coding_system (Qnil, &terminal_coding); 6329 setup_coding_system (Qnil, &terminal_coding);
6260 setup_coding_system (Qnil, &safe_terminal_coding); 6330 setup_coding_system (Qnil, &safe_terminal_coding);
6261 setup_coding_system (Qnil, &default_buffer_file_coding); 6331 setup_coding_system (Qnil, &default_buffer_file_coding);