comparison src/coding.c @ 23339:2da87b489590

(check_composing_code): Fix previous change. Now it alwasy returns 0 or -1. (decode_coding_iso2022): Adjusted for the above change. (encode_coding_iso2022): When encoding the last block, flush out tailing garbage bytes. (setup_coding_system): Delete unnecessary code. (shrink_decoding_region): Check translation table. If ASCII should be translated, give up shrinking. (shrink_encoding_region): Likewise. (SHRINK_CONVERSION_REGION_THRESHHOLD): New macro. (SHRINK_CONVERSION_REGION): New macro. (code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text properties here. (code_convert_region): In the case of encoding, always calulate correct character number. (code_convert_string): Call SHRINK_CONVERSION_REGION. (code_convert_region1): Don't delete text properties here. (check_composing_code): Fix previous change. Now it alwasy returns 0 or -1. (decode_coding_iso2022): Adjusted for the above change. (encode_coding_iso2022): When encoding the last block, flush out tailing garbage bytes. (setup_coding_system): Delete unnecessary code. (shrink_decoding_region): Check translation table. If ASCII should be translated, give up shrinking. (shrink_encoding_region): Likewise. (SHRINK_CONVERSION_REGION_THRESHHOLD): New macro. (SHRINK_CONVERSION_REGION): New macro. (code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text properties here. (code_convert_region): In the case of encoding, always calulate correct character number. (code_convert_string): Call SHRINK_CONVERSION_REGION. (code_convert_region1): Don't delete text properties here.
author Kenichi Handa <handa@m17n.org>
date Mon, 28 Sep 1998 11:52:53 +0000
parents bbd06336cd0c
children 6905813a49c6
comparison
equal deleted inserted replaced
23338:0a2b76b09162 23339:2da87b489590
956 coding->spec.iso2022.last_invalid_designation_register = reg; \ 956 coding->spec.iso2022.last_invalid_designation_register = reg; \
957 goto label_invalid_code; \ 957 goto label_invalid_code; \
958 } \ 958 } \
959 } while (0) 959 } while (0)
960 960
961 /* Check if the current composing sequence contains only valid codes. 961 /* Return 0 if there's a valid composing sequence starting at SRC and
962 If the composing sequence doesn't end before SRC_END, return -1. 962 ending before SRC_END, else return -1. */
963 Else, if it contains only valid codes, return 0.
964 Else return the length of the composing sequence. */
965 963
966 int 964 int
967 check_composing_code (coding, src, src_end) 965 check_composing_code (coding, src, src_end)
968 struct coding_system *coding; 966 struct coding_system *coding;
969 unsigned char *src, *src_end; 967 unsigned char *src, *src_end;
970 { 968 {
971 unsigned char *src_start = src;
972 int invalid_code_found = 0;
973 int charset, c, c1, dim; 969 int charset, c, c1, dim;
974 970
975 while (src < src_end) 971 while (src < src_end)
976 { 972 {
977 if (*src++ != ISO_CODE_ESC) continue; 973 c = *src++;
978 if (src >= src_end) break; 974 if (c >= 0x20)
979 if ((c = *src++) == '1') /* end of compsition */ 975 continue;
980 return (invalid_code_found ? src - src_start : 0); 976 if (c != ISO_CODE_ESC || src >= src_end)
981 if (src + 2 >= src_end) break; 977 return -1;
982 if (!coding->flags & CODING_FLAG_ISO_DESIGNATION) 978 c = *src++;
983 invalid_code_found = 1; 979 if (c == '1') /* end of compsition */
980 return 0;
981 if (src + 2 >= src_end
982 || !coding->flags & CODING_FLAG_ISO_DESIGNATION)
983 return -1;
984
985 dim = (c == '$');
986 if (dim == 1)
987 c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
988 if (c >= '(' && c <= '/')
989 {
990 c1 = *src++;
991 if ((c1 < ' ' || c1 >= 0x80)
992 || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
993 || ! coding->safe_charsets[charset]
994 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
995 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
996 return -1;
997 }
984 else 998 else
985 { 999 return -1;
986 dim = 0; 1000 }
987 if (c == '$') 1001
988 { 1002 /* We have not found the sequence "ESC 1". */
989 dim = 1; 1003 return -1;
990 c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
991 }
992 if (c >= '(' && c <= '/')
993 {
994 c1 = *src++;
995 if ((c1 < ' ' || c1 >= 0x80)
996 || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
997 || ! coding->safe_charsets[charset]
998 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
999 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1000 invalid_code_found = 1;
1001 }
1002 else
1003 invalid_code_found = 1;
1004 }
1005 }
1006 return (invalid_code_found ? src - src_start : -1);
1007 } 1004 }
1008 1005
1009 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 1006 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1010 1007
1011 int 1008 int
1181 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) 1178 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1182 goto label_invalid_code; 1179 goto label_invalid_code;
1183 ONE_MORE_BYTE (c1); 1180 ONE_MORE_BYTE (c1);
1184 if (c1 >= '@' && c1 <= 'B') 1181 if (c1 >= '@' && c1 <= 'B')
1185 { /* designation of JISX0208.1978, GB2312.1980, 1182 { /* designation of JISX0208.1978, GB2312.1980,
1186 or JISX0208.1980 */ 1183 or JISX0208.1980 */
1187 DECODE_DESIGNATION (0, 2, 94, c1); 1184 DECODE_DESIGNATION (0, 2, 94, c1);
1188 } 1185 }
1189 else if (c1 >= 0x28 && c1 <= 0x2B) 1186 else if (c1 >= 0x28 && c1 <= 0x2B)
1190 { /* designation of DIMENSION2_CHARS94 character set */ 1187 { /* designation of DIMENSION2_CHARS94 character set */
1191 ONE_MORE_BYTE (c2); 1188 ONE_MORE_BYTE (c2);
1235 break; 1232 break;
1236 1233
1237 case '0': case '2': /* start composing */ 1234 case '0': case '2': /* start composing */
1238 /* Before processing composing, we must be sure that all 1235 /* Before processing composing, we must be sure that all
1239 characters being composed are supported by CODING. 1236 characters being composed are supported by CODING.
1240 If not, we must give up composing and insert the 1237 If not, we must give up composing. */
1241 bunch of codes for composing as is without decoding. */ 1238 if (check_composing_code (coding, src, src_end) == 0)
1242 { 1239 {
1243 int result1; 1240 /* We are looking at a valid composition sequence. */
1244 1241 coding->composing = (c1 == '0'
1245 result1 = check_composing_code (coding, src, src_end); 1242 ? COMPOSING_NO_RULE_HEAD
1246 if (result1 == 0) 1243 : COMPOSING_WITH_RULE_HEAD);
1247 { 1244 coding->composed_chars = 0;
1248 coding->composing = (c1 == '0' 1245 }
1249 ? COMPOSING_NO_RULE_HEAD 1246 else
1250 : COMPOSING_WITH_RULE_HEAD); 1247 {
1251 coding->composed_chars = 0; 1248 *dst++ = ISO_CODE_ESC;
1252 } 1249 *dst++ = c1;
1253 else if (result1 > 0) 1250 coding->produced_char += 2;
1254 { 1251 }
1255 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1256 {
1257 bcopy (src_base, dst, result1 + 2);
1258 src += result1;
1259 dst += result1 + 2;
1260 coding->produced_char += result1 + 2;
1261 coding->fake_multibyte = 1;
1262 }
1263 else
1264 {
1265 result = CODING_FINISH_INSUFFICIENT_DST;
1266 goto label_end_of_loop_2;
1267 }
1268 }
1269 else
1270 goto label_end_of_loop;
1271 }
1272 break; 1252 break;
1273 1253
1274 case '1': /* end composing */ 1254 case '1': /* end composing */
1255 if (!coding->composing)
1256 {
1257 *dst++ = ISO_CODE_ESC;
1258 *dst++ = c1;
1259 coding->produced_char += 2;
1260 break;
1261 }
1262
1275 if (coding->composed_chars > 0) 1263 if (coding->composed_chars > 0)
1276 { 1264 {
1277 if (coding->composed_chars == 1) 1265 if (coding->composed_chars == 1)
1278 { 1266 {
1279 unsigned char *this_char_start = dst; 1267 unsigned char *this_char_start = dst;
2000 if (coding->mode & CODING_MODE_LAST_BLOCK) 1988 if (coding->mode & CODING_MODE_LAST_BLOCK)
2001 { 1989 {
2002 ENCODE_RESET_PLANE_AND_REGISTER; 1990 ENCODE_RESET_PLANE_AND_REGISTER;
2003 if (COMPOSING_P (coding->composing)) 1991 if (COMPOSING_P (coding->composing))
2004 ENCODE_COMPOSITION_END; 1992 ENCODE_COMPOSITION_END;
1993 if (result == CODING_FINISH_INSUFFICIENT_SRC)
1994 {
1995 while (src < src_end && dst < dst_end)
1996 *dst++ = *src++;
1997 }
2005 } 1998 }
2006 coding->consumed = src - source; 1999 coding->consumed = src - source;
2007 coding->produced = coding->produced_char = dst - destination; 2000 coding->produced = coding->produced_char = dst - destination;
2008 return result; 2001 return result;
2009 } 2002 }
2874 return 0; 2867 return 0;
2875 } 2868 }
2876 2869
2877 /* Initialize remaining fields. */ 2870 /* Initialize remaining fields. */
2878 coding->composing = 0; 2871 coding->composing = 0;
2879 coding->translation_table_for_decode = Qnil;
2880 coding->translation_table_for_encode = Qnil;
2881 2872
2882 /* Get values of coding system properties: 2873 /* Get values of coding system properties:
2883 `post-read-conversion', `pre-write-conversion', 2874 `post-read-conversion', `pre-write-conversion',
2884 `translation-table-for-decode', `translation-table-for-encode'. */ 2875 `translation-table-for-decode', `translation-table-for-encode'. */
2885 plist = XVECTOR (coding_spec)->contents[3]; 2876 plist = XVECTOR (coding_spec)->contents[3];
3860 struct coding_system *coding; 3851 struct coding_system *coding;
3861 unsigned char *str; 3852 unsigned char *str;
3862 { 3853 {
3863 unsigned char *begp_orig, *begp, *endp_orig, *endp, c; 3854 unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3864 int eol_conversion; 3855 int eol_conversion;
3856 Lisp_Object translation_table;
3865 3857
3866 if (coding->type == coding_type_ccl 3858 if (coding->type == coding_type_ccl
3867 || coding->type == coding_type_undecided 3859 || coding->type == coding_type_undecided
3868 || !NILP (coding->post_read_conversion)) 3860 || !NILP (coding->post_read_conversion))
3869 { 3861 {
3873 else if (coding->type == coding_type_no_conversion) 3865 else if (coding->type == coding_type_no_conversion)
3874 { 3866 {
3875 /* We need no conversion, but don't have to skip any data here. 3867 /* We need no conversion, but don't have to skip any data here.
3876 Decoding routine handles them effectively anyway. */ 3868 Decoding routine handles them effectively anyway. */
3877 return; 3869 return;
3870 }
3871
3872 translation_table = coding->translation_table_for_decode;
3873 if (NILP (translation_table) && !NILP (Venable_character_translation))
3874 translation_table = Vstandard_translation_table_for_decode;
3875 if (CHAR_TABLE_P (translation_table))
3876 {
3877 int i;
3878 for (i = 0; i < 128; i++)
3879 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
3880 break;
3881 if (i < 128)
3882 /* Some ASCII character should be tranlsated. We give up
3883 shrinking. */
3884 return;
3878 } 3885 }
3879 3886
3880 eol_conversion = (coding->eol_type != CODING_EOL_LF); 3887 eol_conversion = (coding->eol_type != CODING_EOL_LF);
3881 3888
3882 if ((! eol_conversion) && (coding->heading_ascii >= 0)) 3889 if ((! eol_conversion) && (coding->heading_ascii >= 0))
4020 struct coding_system *coding; 4027 struct coding_system *coding;
4021 unsigned char *str; 4028 unsigned char *str;
4022 { 4029 {
4023 unsigned char *begp_orig, *begp, *endp_orig, *endp; 4030 unsigned char *begp_orig, *begp, *endp_orig, *endp;
4024 int eol_conversion; 4031 int eol_conversion;
4032 Lisp_Object translation_table;
4025 4033
4026 if (coding->type == coding_type_ccl) 4034 if (coding->type == coding_type_ccl)
4027 /* We can't skip any data. */ 4035 /* We can't skip any data. */
4028 return; 4036 return;
4029 else if (coding->type == coding_type_no_conversion) 4037 else if (coding->type == coding_type_no_conversion)
4030 { 4038 {
4031 /* We need no conversion. */ 4039 /* We need no conversion. */
4032 *beg = *end; 4040 *beg = *end;
4033 return; 4041 return;
4042 }
4043
4044 translation_table = coding->translation_table_for_encode;
4045 if (NILP (translation_table) && !NILP (Venable_character_translation))
4046 translation_table = Vstandard_translation_table_for_encode;
4047 if (CHAR_TABLE_P (translation_table))
4048 {
4049 int i;
4050 for (i = 0; i < 128; i++)
4051 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4052 break;
4053 if (i < 128)
4054 /* Some ASCII character should be tranlsated. We give up
4055 shrinking. */
4056 return;
4034 } 4057 }
4035 4058
4036 if (str) 4059 if (str)
4037 { 4060 {
4038 begp_orig = begp = str + *beg; 4061 begp_orig = begp = str + *beg;
4095 *beg += begp - begp_orig; 4118 *beg += begp - begp_orig;
4096 *end += endp - endp_orig; 4119 *end += endp - endp_orig;
4097 return; 4120 return;
4098 } 4121 }
4099 4122
4123 /* As shrinking conversion region requires some overhead, we don't try
4124 shrinking if the length of conversion region is less than this
4125 value. */
4126 static int shrink_conversion_region_threshhold = 1024;
4127
4128 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
4129 do { \
4130 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
4131 { \
4132 if (encodep) shrink_encoding_region (beg, end, coding, str); \
4133 else shrink_decoding_region (beg, end, coding, str); \
4134 } \
4135 } while (0)
4136
4100 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the 4137 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4101 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by 4138 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4102 coding system CODING, and return the status code of code conversion 4139 coding system CODING, and return the status code of code conversion
4103 (currently, this value has no meaning). 4140 (currently, this value has no meaning).
4104 4141
4238 { 4275 {
4239 int from_byte_orig = from_byte, to_byte_orig = to_byte; 4276 int from_byte_orig = from_byte, to_byte_orig = to_byte;
4240 4277
4241 if (from < GPT && GPT < to) 4278 if (from < GPT && GPT < to)
4242 move_gap_both (from, from_byte); 4279 move_gap_both (from, from_byte);
4243 if (encodep) 4280 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4244 shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4245 else
4246 shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4247 if (from_byte == to_byte 4281 if (from_byte == to_byte
4248 && ! (coding->mode & CODING_MODE_LAST_BLOCK 4282 && ! (coding->mode & CODING_MODE_LAST_BLOCK
4249 && CODING_REQUIRE_FLUSHING (coding))) 4283 && CODING_REQUIRE_FLUSHING (coding)))
4250 { 4284 {
4251 coding->produced = len_byte; 4285 coding->produced = len_byte;
4261 total_skip = head_skip + tail_skip; 4295 total_skip = head_skip + tail_skip;
4262 from += head_skip; 4296 from += head_skip;
4263 to -= tail_skip; 4297 to -= tail_skip;
4264 len -= total_skip; len_byte -= total_skip; 4298 len -= total_skip; len_byte -= total_skip;
4265 } 4299 }
4300
4301 /* The code conversion routine can not preserve text properties for
4302 now. So, we must remove all text properties in the region. */
4303 if (replace)
4304 Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4266 4305
4267 /* For converion, we must put the gap before the text in addition to 4306 /* For converion, we must put the gap before the text in addition to
4268 making the gap larger for efficient decoding. The required gap 4307 making the gap larger for efficient decoding. The required gap
4269 size starts from 2000 which is the magic number used in make_gap. 4308 size starts from 2000 which is the magic number used in make_gap.
4270 But, after one batch of conversion, it will be incremented if we 4309 But, after one batch of conversion, it will be incremented if we
4437 } 4476 }
4438 } 4477 }
4439 if (src - dst > 0) *dst = 0; /* Put an anchor. */ 4478 if (src - dst > 0) *dst = 0; /* Put an anchor. */
4440 4479
4441 if (multibyte 4480 if (multibyte
4442 && (fake_multibyte 4481 && (encodep
4443 || !encodep && (to - from) != (to_byte - from_byte))) 4482 || fake_multibyte
4483 || (to - from) != (to_byte - from_byte)))
4444 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); 4484 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4445 4485
4446 /* If we have shrinked the conversion area, adjust it now. */ 4486 /* If we have shrinked the conversion area, adjust it now. */
4447 if (total_skip > 0) 4487 if (total_skip > 0)
4448 { 4488 {
4560 : ! CODING_REQUIRE_DECODING (coding)) 4600 : ! CODING_REQUIRE_DECODING (coding))
4561 from = to_byte; 4601 from = to_byte;
4562 else 4602 else
4563 { 4603 {
4564 /* Try to skip the heading and tailing ASCIIs. */ 4604 /* Try to skip the heading and tailing ASCIIs. */
4565 if (encodep) 4605 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
4566 shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data); 4606 encodep);
4567 else
4568 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4569 } 4607 }
4570 if (from == to_byte) 4608 if (from == to_byte)
4571 return (nocopy ? str : Fcopy_sequence (str)); 4609 return (nocopy ? str : Fcopy_sequence (str));
4572 4610
4573 if (encodep) 4611 if (encodep)
4811 if (NILP (coding_system)) 4849 if (NILP (coding_system))
4812 return make_number (to - from); 4850 return make_number (to - from);
4813 4851
4814 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 4852 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4815 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); 4853 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4816
4817 /* The code conversion routine can not preserve text properties for
4818 now. So, we must remove all text properties in the region. */
4819 Fset_text_properties (start, end, Qnil, Qnil);
4820 4854
4821 coding.mode |= CODING_MODE_LAST_BLOCK; 4855 coding.mode |= CODING_MODE_LAST_BLOCK;
4822 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), 4856 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4823 &coding, encodep, 1); 4857 &coding, encodep, 1);
4824 Vlast_coding_system_used = coding.symbol; 4858 Vlast_coding_system_used = coding.symbol;