Mercurial > emacs
comparison src/coding.c @ 23339:2da87b489590
(check_composing_code): Fix previous change. Now it
alwasy returns 0 or -1.
(decode_coding_iso2022): Adjusted for the above change.
(encode_coding_iso2022): When encoding the last block, flush out
tailing garbage bytes.
(setup_coding_system): Delete unnecessary code.
(shrink_decoding_region): Check translation table. If ASCII
should be translated, give up shrinking.
(shrink_encoding_region): Likewise.
(SHRINK_CONVERSION_REGION_THRESHHOLD): New macro.
(SHRINK_CONVERSION_REGION): New macro.
(code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text
properties here.
(code_convert_region): In the case of encoding, always calulate
correct character number.
(code_convert_string): Call SHRINK_CONVERSION_REGION.
(code_convert_region1): Don't delete text properties here.
(check_composing_code): Fix previous change. Now it
alwasy returns 0 or -1.
(decode_coding_iso2022): Adjusted for the above change.
(encode_coding_iso2022): When encoding the last block, flush out
tailing garbage bytes.
(setup_coding_system): Delete unnecessary code.
(shrink_decoding_region): Check translation table. If ASCII
should be translated, give up shrinking.
(shrink_encoding_region): Likewise.
(SHRINK_CONVERSION_REGION_THRESHHOLD): New macro.
(SHRINK_CONVERSION_REGION): New macro.
(code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text
properties here.
(code_convert_region): In the case of encoding, always calulate
correct character number.
(code_convert_string): Call SHRINK_CONVERSION_REGION.
(code_convert_region1): Don't delete text properties here.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 28 Sep 1998 11:52:53 +0000 |
parents | bbd06336cd0c |
children | 6905813a49c6 |
comparison
equal
deleted
inserted
replaced
23338:0a2b76b09162 | 23339:2da87b489590 |
---|---|
956 coding->spec.iso2022.last_invalid_designation_register = reg; \ | 956 coding->spec.iso2022.last_invalid_designation_register = reg; \ |
957 goto label_invalid_code; \ | 957 goto label_invalid_code; \ |
958 } \ | 958 } \ |
959 } while (0) | 959 } while (0) |
960 | 960 |
961 /* Check if the current composing sequence contains only valid codes. | 961 /* Return 0 if there's a valid composing sequence starting at SRC and |
962 If the composing sequence doesn't end before SRC_END, return -1. | 962 ending before SRC_END, else return -1. */ |
963 Else, if it contains only valid codes, return 0. | |
964 Else return the length of the composing sequence. */ | |
965 | 963 |
966 int | 964 int |
967 check_composing_code (coding, src, src_end) | 965 check_composing_code (coding, src, src_end) |
968 struct coding_system *coding; | 966 struct coding_system *coding; |
969 unsigned char *src, *src_end; | 967 unsigned char *src, *src_end; |
970 { | 968 { |
971 unsigned char *src_start = src; | |
972 int invalid_code_found = 0; | |
973 int charset, c, c1, dim; | 969 int charset, c, c1, dim; |
974 | 970 |
975 while (src < src_end) | 971 while (src < src_end) |
976 { | 972 { |
977 if (*src++ != ISO_CODE_ESC) continue; | 973 c = *src++; |
978 if (src >= src_end) break; | 974 if (c >= 0x20) |
979 if ((c = *src++) == '1') /* end of compsition */ | 975 continue; |
980 return (invalid_code_found ? src - src_start : 0); | 976 if (c != ISO_CODE_ESC || src >= src_end) |
981 if (src + 2 >= src_end) break; | 977 return -1; |
982 if (!coding->flags & CODING_FLAG_ISO_DESIGNATION) | 978 c = *src++; |
983 invalid_code_found = 1; | 979 if (c == '1') /* end of compsition */ |
980 return 0; | |
981 if (src + 2 >= src_end | |
982 || !coding->flags & CODING_FLAG_ISO_DESIGNATION) | |
983 return -1; | |
984 | |
985 dim = (c == '$'); | |
986 if (dim == 1) | |
987 c = (*src >= '@' && *src <= 'B') ? '(' : *src++; | |
988 if (c >= '(' && c <= '/') | |
989 { | |
990 c1 = *src++; | |
991 if ((c1 < ' ' || c1 >= 0x80) | |
992 || (charset = iso_charset_table[dim][c >= ','][c1]) < 0 | |
993 || ! coding->safe_charsets[charset] | |
994 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | |
995 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) | |
996 return -1; | |
997 } | |
984 else | 998 else |
985 { | 999 return -1; |
986 dim = 0; | 1000 } |
987 if (c == '$') | 1001 |
988 { | 1002 /* We have not found the sequence "ESC 1". */ |
989 dim = 1; | 1003 return -1; |
990 c = (*src >= '@' && *src <= 'B') ? '(' : *src++; | |
991 } | |
992 if (c >= '(' && c <= '/') | |
993 { | |
994 c1 = *src++; | |
995 if ((c1 < ' ' || c1 >= 0x80) | |
996 || (charset = iso_charset_table[dim][c >= ','][c1]) < 0 | |
997 || ! coding->safe_charsets[charset] | |
998 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | |
999 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) | |
1000 invalid_code_found = 1; | |
1001 } | |
1002 else | |
1003 invalid_code_found = 1; | |
1004 } | |
1005 } | |
1006 return (invalid_code_found ? src - src_start : -1); | |
1007 } | 1004 } |
1008 | 1005 |
1009 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 1006 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
1010 | 1007 |
1011 int | 1008 int |
1181 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) | 1178 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) |
1182 goto label_invalid_code; | 1179 goto label_invalid_code; |
1183 ONE_MORE_BYTE (c1); | 1180 ONE_MORE_BYTE (c1); |
1184 if (c1 >= '@' && c1 <= 'B') | 1181 if (c1 >= '@' && c1 <= 'B') |
1185 { /* designation of JISX0208.1978, GB2312.1980, | 1182 { /* designation of JISX0208.1978, GB2312.1980, |
1186 or JISX0208.1980 */ | 1183 or JISX0208.1980 */ |
1187 DECODE_DESIGNATION (0, 2, 94, c1); | 1184 DECODE_DESIGNATION (0, 2, 94, c1); |
1188 } | 1185 } |
1189 else if (c1 >= 0x28 && c1 <= 0x2B) | 1186 else if (c1 >= 0x28 && c1 <= 0x2B) |
1190 { /* designation of DIMENSION2_CHARS94 character set */ | 1187 { /* designation of DIMENSION2_CHARS94 character set */ |
1191 ONE_MORE_BYTE (c2); | 1188 ONE_MORE_BYTE (c2); |
1235 break; | 1232 break; |
1236 | 1233 |
1237 case '0': case '2': /* start composing */ | 1234 case '0': case '2': /* start composing */ |
1238 /* Before processing composing, we must be sure that all | 1235 /* Before processing composing, we must be sure that all |
1239 characters being composed are supported by CODING. | 1236 characters being composed are supported by CODING. |
1240 If not, we must give up composing and insert the | 1237 If not, we must give up composing. */ |
1241 bunch of codes for composing as is without decoding. */ | 1238 if (check_composing_code (coding, src, src_end) == 0) |
1242 { | 1239 { |
1243 int result1; | 1240 /* We are looking at a valid composition sequence. */ |
1244 | 1241 coding->composing = (c1 == '0' |
1245 result1 = check_composing_code (coding, src, src_end); | 1242 ? COMPOSING_NO_RULE_HEAD |
1246 if (result1 == 0) | 1243 : COMPOSING_WITH_RULE_HEAD); |
1247 { | 1244 coding->composed_chars = 0; |
1248 coding->composing = (c1 == '0' | 1245 } |
1249 ? COMPOSING_NO_RULE_HEAD | 1246 else |
1250 : COMPOSING_WITH_RULE_HEAD); | 1247 { |
1251 coding->composed_chars = 0; | 1248 *dst++ = ISO_CODE_ESC; |
1252 } | 1249 *dst++ = c1; |
1253 else if (result1 > 0) | 1250 coding->produced_char += 2; |
1254 { | 1251 } |
1255 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) | |
1256 { | |
1257 bcopy (src_base, dst, result1 + 2); | |
1258 src += result1; | |
1259 dst += result1 + 2; | |
1260 coding->produced_char += result1 + 2; | |
1261 coding->fake_multibyte = 1; | |
1262 } | |
1263 else | |
1264 { | |
1265 result = CODING_FINISH_INSUFFICIENT_DST; | |
1266 goto label_end_of_loop_2; | |
1267 } | |
1268 } | |
1269 else | |
1270 goto label_end_of_loop; | |
1271 } | |
1272 break; | 1252 break; |
1273 | 1253 |
1274 case '1': /* end composing */ | 1254 case '1': /* end composing */ |
1255 if (!coding->composing) | |
1256 { | |
1257 *dst++ = ISO_CODE_ESC; | |
1258 *dst++ = c1; | |
1259 coding->produced_char += 2; | |
1260 break; | |
1261 } | |
1262 | |
1275 if (coding->composed_chars > 0) | 1263 if (coding->composed_chars > 0) |
1276 { | 1264 { |
1277 if (coding->composed_chars == 1) | 1265 if (coding->composed_chars == 1) |
1278 { | 1266 { |
1279 unsigned char *this_char_start = dst; | 1267 unsigned char *this_char_start = dst; |
2000 if (coding->mode & CODING_MODE_LAST_BLOCK) | 1988 if (coding->mode & CODING_MODE_LAST_BLOCK) |
2001 { | 1989 { |
2002 ENCODE_RESET_PLANE_AND_REGISTER; | 1990 ENCODE_RESET_PLANE_AND_REGISTER; |
2003 if (COMPOSING_P (coding->composing)) | 1991 if (COMPOSING_P (coding->composing)) |
2004 ENCODE_COMPOSITION_END; | 1992 ENCODE_COMPOSITION_END; |
1993 if (result == CODING_FINISH_INSUFFICIENT_SRC) | |
1994 { | |
1995 while (src < src_end && dst < dst_end) | |
1996 *dst++ = *src++; | |
1997 } | |
2005 } | 1998 } |
2006 coding->consumed = src - source; | 1999 coding->consumed = src - source; |
2007 coding->produced = coding->produced_char = dst - destination; | 2000 coding->produced = coding->produced_char = dst - destination; |
2008 return result; | 2001 return result; |
2009 } | 2002 } |
2874 return 0; | 2867 return 0; |
2875 } | 2868 } |
2876 | 2869 |
2877 /* Initialize remaining fields. */ | 2870 /* Initialize remaining fields. */ |
2878 coding->composing = 0; | 2871 coding->composing = 0; |
2879 coding->translation_table_for_decode = Qnil; | |
2880 coding->translation_table_for_encode = Qnil; | |
2881 | 2872 |
2882 /* Get values of coding system properties: | 2873 /* Get values of coding system properties: |
2883 `post-read-conversion', `pre-write-conversion', | 2874 `post-read-conversion', `pre-write-conversion', |
2884 `translation-table-for-decode', `translation-table-for-encode'. */ | 2875 `translation-table-for-decode', `translation-table-for-encode'. */ |
2885 plist = XVECTOR (coding_spec)->contents[3]; | 2876 plist = XVECTOR (coding_spec)->contents[3]; |
3860 struct coding_system *coding; | 3851 struct coding_system *coding; |
3861 unsigned char *str; | 3852 unsigned char *str; |
3862 { | 3853 { |
3863 unsigned char *begp_orig, *begp, *endp_orig, *endp, c; | 3854 unsigned char *begp_orig, *begp, *endp_orig, *endp, c; |
3864 int eol_conversion; | 3855 int eol_conversion; |
3856 Lisp_Object translation_table; | |
3865 | 3857 |
3866 if (coding->type == coding_type_ccl | 3858 if (coding->type == coding_type_ccl |
3867 || coding->type == coding_type_undecided | 3859 || coding->type == coding_type_undecided |
3868 || !NILP (coding->post_read_conversion)) | 3860 || !NILP (coding->post_read_conversion)) |
3869 { | 3861 { |
3873 else if (coding->type == coding_type_no_conversion) | 3865 else if (coding->type == coding_type_no_conversion) |
3874 { | 3866 { |
3875 /* We need no conversion, but don't have to skip any data here. | 3867 /* We need no conversion, but don't have to skip any data here. |
3876 Decoding routine handles them effectively anyway. */ | 3868 Decoding routine handles them effectively anyway. */ |
3877 return; | 3869 return; |
3870 } | |
3871 | |
3872 translation_table = coding->translation_table_for_decode; | |
3873 if (NILP (translation_table) && !NILP (Venable_character_translation)) | |
3874 translation_table = Vstandard_translation_table_for_decode; | |
3875 if (CHAR_TABLE_P (translation_table)) | |
3876 { | |
3877 int i; | |
3878 for (i = 0; i < 128; i++) | |
3879 if (!NILP (CHAR_TABLE_REF (translation_table, i))) | |
3880 break; | |
3881 if (i < 128) | |
3882 /* Some ASCII character should be tranlsated. We give up | |
3883 shrinking. */ | |
3884 return; | |
3878 } | 3885 } |
3879 | 3886 |
3880 eol_conversion = (coding->eol_type != CODING_EOL_LF); | 3887 eol_conversion = (coding->eol_type != CODING_EOL_LF); |
3881 | 3888 |
3882 if ((! eol_conversion) && (coding->heading_ascii >= 0)) | 3889 if ((! eol_conversion) && (coding->heading_ascii >= 0)) |
4020 struct coding_system *coding; | 4027 struct coding_system *coding; |
4021 unsigned char *str; | 4028 unsigned char *str; |
4022 { | 4029 { |
4023 unsigned char *begp_orig, *begp, *endp_orig, *endp; | 4030 unsigned char *begp_orig, *begp, *endp_orig, *endp; |
4024 int eol_conversion; | 4031 int eol_conversion; |
4032 Lisp_Object translation_table; | |
4025 | 4033 |
4026 if (coding->type == coding_type_ccl) | 4034 if (coding->type == coding_type_ccl) |
4027 /* We can't skip any data. */ | 4035 /* We can't skip any data. */ |
4028 return; | 4036 return; |
4029 else if (coding->type == coding_type_no_conversion) | 4037 else if (coding->type == coding_type_no_conversion) |
4030 { | 4038 { |
4031 /* We need no conversion. */ | 4039 /* We need no conversion. */ |
4032 *beg = *end; | 4040 *beg = *end; |
4033 return; | 4041 return; |
4042 } | |
4043 | |
4044 translation_table = coding->translation_table_for_encode; | |
4045 if (NILP (translation_table) && !NILP (Venable_character_translation)) | |
4046 translation_table = Vstandard_translation_table_for_encode; | |
4047 if (CHAR_TABLE_P (translation_table)) | |
4048 { | |
4049 int i; | |
4050 for (i = 0; i < 128; i++) | |
4051 if (!NILP (CHAR_TABLE_REF (translation_table, i))) | |
4052 break; | |
4053 if (i < 128) | |
4054 /* Some ASCII character should be tranlsated. We give up | |
4055 shrinking. */ | |
4056 return; | |
4034 } | 4057 } |
4035 | 4058 |
4036 if (str) | 4059 if (str) |
4037 { | 4060 { |
4038 begp_orig = begp = str + *beg; | 4061 begp_orig = begp = str + *beg; |
4095 *beg += begp - begp_orig; | 4118 *beg += begp - begp_orig; |
4096 *end += endp - endp_orig; | 4119 *end += endp - endp_orig; |
4097 return; | 4120 return; |
4098 } | 4121 } |
4099 | 4122 |
4123 /* As shrinking conversion region requires some overhead, we don't try | |
4124 shrinking if the length of conversion region is less than this | |
4125 value. */ | |
4126 static int shrink_conversion_region_threshhold = 1024; | |
4127 | |
4128 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \ | |
4129 do { \ | |
4130 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \ | |
4131 { \ | |
4132 if (encodep) shrink_encoding_region (beg, end, coding, str); \ | |
4133 else shrink_decoding_region (beg, end, coding, str); \ | |
4134 } \ | |
4135 } while (0) | |
4136 | |
4100 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the | 4137 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the |
4101 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by | 4138 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by |
4102 coding system CODING, and return the status code of code conversion | 4139 coding system CODING, and return the status code of code conversion |
4103 (currently, this value has no meaning). | 4140 (currently, this value has no meaning). |
4104 | 4141 |
4238 { | 4275 { |
4239 int from_byte_orig = from_byte, to_byte_orig = to_byte; | 4276 int from_byte_orig = from_byte, to_byte_orig = to_byte; |
4240 | 4277 |
4241 if (from < GPT && GPT < to) | 4278 if (from < GPT && GPT < to) |
4242 move_gap_both (from, from_byte); | 4279 move_gap_both (from, from_byte); |
4243 if (encodep) | 4280 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); |
4244 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); | |
4245 else | |
4246 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); | |
4247 if (from_byte == to_byte | 4281 if (from_byte == to_byte |
4248 && ! (coding->mode & CODING_MODE_LAST_BLOCK | 4282 && ! (coding->mode & CODING_MODE_LAST_BLOCK |
4249 && CODING_REQUIRE_FLUSHING (coding))) | 4283 && CODING_REQUIRE_FLUSHING (coding))) |
4250 { | 4284 { |
4251 coding->produced = len_byte; | 4285 coding->produced = len_byte; |
4261 total_skip = head_skip + tail_skip; | 4295 total_skip = head_skip + tail_skip; |
4262 from += head_skip; | 4296 from += head_skip; |
4263 to -= tail_skip; | 4297 to -= tail_skip; |
4264 len -= total_skip; len_byte -= total_skip; | 4298 len -= total_skip; len_byte -= total_skip; |
4265 } | 4299 } |
4300 | |
4301 /* The code conversion routine can not preserve text properties for | |
4302 now. So, we must remove all text properties in the region. */ | |
4303 if (replace) | |
4304 Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil); | |
4266 | 4305 |
4267 /* For converion, we must put the gap before the text in addition to | 4306 /* For converion, we must put the gap before the text in addition to |
4268 making the gap larger for efficient decoding. The required gap | 4307 making the gap larger for efficient decoding. The required gap |
4269 size starts from 2000 which is the magic number used in make_gap. | 4308 size starts from 2000 which is the magic number used in make_gap. |
4270 But, after one batch of conversion, it will be incremented if we | 4309 But, after one batch of conversion, it will be incremented if we |
4437 } | 4476 } |
4438 } | 4477 } |
4439 if (src - dst > 0) *dst = 0; /* Put an anchor. */ | 4478 if (src - dst > 0) *dst = 0; /* Put an anchor. */ |
4440 | 4479 |
4441 if (multibyte | 4480 if (multibyte |
4442 && (fake_multibyte | 4481 && (encodep |
4443 || !encodep && (to - from) != (to_byte - from_byte))) | 4482 || fake_multibyte |
4483 || (to - from) != (to_byte - from_byte))) | |
4444 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); | 4484 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); |
4445 | 4485 |
4446 /* If we have shrinked the conversion area, adjust it now. */ | 4486 /* If we have shrinked the conversion area, adjust it now. */ |
4447 if (total_skip > 0) | 4487 if (total_skip > 0) |
4448 { | 4488 { |
4560 : ! CODING_REQUIRE_DECODING (coding)) | 4600 : ! CODING_REQUIRE_DECODING (coding)) |
4561 from = to_byte; | 4601 from = to_byte; |
4562 else | 4602 else |
4563 { | 4603 { |
4564 /* Try to skip the heading and tailing ASCIIs. */ | 4604 /* Try to skip the heading and tailing ASCIIs. */ |
4565 if (encodep) | 4605 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, |
4566 shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data); | 4606 encodep); |
4567 else | |
4568 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); | |
4569 } | 4607 } |
4570 if (from == to_byte) | 4608 if (from == to_byte) |
4571 return (nocopy ? str : Fcopy_sequence (str)); | 4609 return (nocopy ? str : Fcopy_sequence (str)); |
4572 | 4610 |
4573 if (encodep) | 4611 if (encodep) |
4811 if (NILP (coding_system)) | 4849 if (NILP (coding_system)) |
4812 return make_number (to - from); | 4850 return make_number (to - from); |
4813 | 4851 |
4814 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 4852 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
4815 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 4853 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
4816 | |
4817 /* The code conversion routine can not preserve text properties for | |
4818 now. So, we must remove all text properties in the region. */ | |
4819 Fset_text_properties (start, end, Qnil, Qnil); | |
4820 | 4854 |
4821 coding.mode |= CODING_MODE_LAST_BLOCK; | 4855 coding.mode |= CODING_MODE_LAST_BLOCK; |
4822 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), | 4856 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), |
4823 &coding, encodep, 1); | 4857 &coding, encodep, 1); |
4824 Vlast_coding_system_used = coding.symbol; | 4858 Vlast_coding_system_used = coding.symbol; |