Mercurial > emacs
comparison src/coding.c @ 89665:9010cefe8d29
(enum iso_code_class_type): Delete ISO_carriage_return.
(CODING_GET_INFO): Delete argument eol_type. Callers changed.
(decode_coding_utf_8): Don't do eol converion.
(detect_coding_utf_16): Check coding->src_chars, not
coding->src_bytes. Add heuristics for those that have no
signature.
(decode_coding_emacs_mule): Don't do eol converion.
(decode_coding_iso_2022): Likewise.
(decode_coding_sjis): Likewise.
(decode_coding_big5): Likewise.
(decode_coding_charset): Likewise.
(adjust_coding_eol_type): Return a new coding system.
(detect_coding): Don't detect eol. Fix for utf-16 detection.
(decode_eol): In case of CRLF->LF conversion, use del_range_2 on
each change.
(decode_coding): Pay attention to undo_list. Do eol convesion for
all types of coding-systems (if necessary).
(Vcode_conversion_work_buf_list): Delete it.
(Vcode_conversion_reused_workbuf): Renamed from
Vcode_conversion_reused_work_buf.
(Vcode_conversion_workbuf_name): New variable.
(reused_workbuf_in_use): New variable.
(make_conversion_work_buffer): Delete the arg DEPTH.
(code_conversion_restore): Argument changed to cons.
(code_conversion_save): Delete the argument BUFFER. Callers
changed.
(detect_coding_system): New argument src_chars. Callers changed.
Fix for utf-16 detection.
(init_coding_once): Don't use ISO_carriage_return.
(syms_of_coding): Initialized Vcode_conversion_workbuf_name and
reused_workbuf_in_use.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 02 Dec 2003 01:40:27 +0000 |
parents | cbaa9fd1aa5c |
children | cf1ff36f92dc |
comparison
equal
deleted
inserted
replaced
89664:5548dd3d1a7c | 89665:9010cefe8d29 |
---|---|
489 enum iso_code_class_type | 489 enum iso_code_class_type |
490 { | 490 { |
491 ISO_control_0, /* Control codes in the range | 491 ISO_control_0, /* Control codes in the range |
492 0x00..0x1F and 0x7F, except for the | 492 0x00..0x1F and 0x7F, except for the |
493 following 5 codes. */ | 493 following 5 codes. */ |
494 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ | |
495 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ | 494 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ |
496 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ | 495 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ |
497 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ | 496 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ |
498 ISO_escape, /* ISO_CODE_SO (0x1B) */ | 497 ISO_escape, /* ISO_CODE_SO (0x1B) */ |
499 ISO_control_1, /* Control codes in the range | 498 ISO_control_1, /* Control codes in the range |
708 #endif | 707 #endif |
709 #ifndef max | 708 #ifndef max |
710 #define max(a, b) ((a) > (b) ? (a) : (b)) | 709 #define max(a, b) ((a) > (b) ? (a) : (b)) |
711 #endif | 710 #endif |
712 | 711 |
713 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \ | 712 #define CODING_GET_INFO(coding, attrs, charset_list) \ |
714 do { \ | 713 do { \ |
715 attrs = CODING_ID_ATTRS (coding->id); \ | 714 (attrs) = CODING_ID_ATTRS ((coding)->id); \ |
716 eol_type = CODING_ID_EOL_TYPE (coding->id); \ | 715 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ |
717 if (VECTORP (eol_type)) \ | |
718 eol_type = Qunix; \ | |
719 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ | |
720 } while (0) | 716 } while (0) |
721 | 717 |
722 | 718 |
723 /* Safely get one byte from the source text pointed by SRC which ends | 719 /* Safely get one byte from the source text pointed by SRC which ends |
724 at SRC_END, and set C to that byte. If there are not enough bytes | 720 at SRC_END, and set C to that byte. If there are not enough bytes |
1130 const unsigned char *src_base; | 1126 const unsigned char *src_base; |
1131 int *charbuf = coding->charbuf; | 1127 int *charbuf = coding->charbuf; |
1132 int *charbuf_end = charbuf + coding->charbuf_size; | 1128 int *charbuf_end = charbuf + coding->charbuf_size; |
1133 int consumed_chars = 0, consumed_chars_base; | 1129 int consumed_chars = 0, consumed_chars_base; |
1134 int multibytep = coding->src_multibyte; | 1130 int multibytep = coding->src_multibyte; |
1135 Lisp_Object attr, eol_type, charset_list; | 1131 Lisp_Object attr, charset_list; |
1136 | 1132 |
1137 CODING_GET_INFO (coding, attr, eol_type, charset_list); | 1133 CODING_GET_INFO (coding, attr, charset_list); |
1138 | 1134 |
1139 while (1) | 1135 while (1) |
1140 { | 1136 { |
1141 int c, c1, c2, c3, c4, c5; | 1137 int c, c1, c2, c3, c4, c5; |
1142 | 1138 |
1148 | 1144 |
1149 ONE_MORE_BYTE (c1); | 1145 ONE_MORE_BYTE (c1); |
1150 if (UTF_8_1_OCTET_P(c1)) | 1146 if (UTF_8_1_OCTET_P(c1)) |
1151 { | 1147 { |
1152 c = c1; | 1148 c = c1; |
1153 if (c == '\r') | |
1154 { | |
1155 if (EQ (eol_type, Qdos)) | |
1156 { | |
1157 if (src == src_end) | |
1158 { | |
1159 coding->result = CODING_RESULT_INSUFFICIENT_SRC; | |
1160 goto no_more_source; | |
1161 } | |
1162 if (*src == '\n') | |
1163 ONE_MORE_BYTE (c); | |
1164 } | |
1165 else if (EQ (eol_type, Qmac)) | |
1166 c = '\n'; | |
1167 } | |
1168 } | 1149 } |
1169 else | 1150 else |
1170 { | 1151 { |
1171 ONE_MORE_BYTE (c2); | 1152 ONE_MORE_BYTE (c2); |
1172 if (! UTF_8_EXTRA_OCTET_P (c2)) | 1153 if (! UTF_8_EXTRA_OCTET_P (c2)) |
1323 int multibytep = coding->src_multibyte; | 1304 int multibytep = coding->src_multibyte; |
1324 int consumed_chars = 0; | 1305 int consumed_chars = 0; |
1325 int c1, c2; | 1306 int c1, c2; |
1326 | 1307 |
1327 detect_info->checked |= CATEGORY_MASK_UTF_16; | 1308 detect_info->checked |= CATEGORY_MASK_UTF_16; |
1328 | |
1329 if (coding->mode & CODING_MODE_LAST_BLOCK | 1309 if (coding->mode & CODING_MODE_LAST_BLOCK |
1330 && (coding->src_bytes & 1)) | 1310 && (coding->src_chars & 1)) |
1331 { | 1311 { |
1332 detect_info->rejected |= CATEGORY_MASK_UTF_16; | 1312 detect_info->rejected |= CATEGORY_MASK_UTF_16; |
1333 return 0; | 1313 return 0; |
1334 } | 1314 } |
1315 | |
1335 ONE_MORE_BYTE (c1); | 1316 ONE_MORE_BYTE (c1); |
1336 ONE_MORE_BYTE (c2); | 1317 ONE_MORE_BYTE (c2); |
1337 | |
1338 if ((c1 == 0xFF) && (c2 == 0xFE)) | 1318 if ((c1 == 0xFF) && (c2 == 0xFE)) |
1339 { | 1319 { |
1340 detect_info->found |= (CATEGORY_MASK_UTF_16_LE | 1320 detect_info->found |= (CATEGORY_MASK_UTF_16_LE |
1341 | CATEGORY_MASK_UTF_16_AUTO); | 1321 | CATEGORY_MASK_UTF_16_AUTO); |
1342 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; | 1322 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE |
1323 | CATEGORY_MASK_UTF_16_BE_NOSIG | |
1324 | CATEGORY_MASK_UTF_16_LE_NOSIG); | |
1343 } | 1325 } |
1344 else if ((c1 == 0xFE) && (c2 == 0xFF)) | 1326 else if ((c1 == 0xFE) && (c2 == 0xFF)) |
1345 { | 1327 { |
1346 detect_info->found |= (CATEGORY_MASK_UTF_16_BE | 1328 detect_info->found |= (CATEGORY_MASK_UTF_16_BE |
1347 | CATEGORY_MASK_UTF_16_AUTO); | 1329 | CATEGORY_MASK_UTF_16_AUTO); |
1348 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; | 1330 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE |
1331 | CATEGORY_MASK_UTF_16_BE_NOSIG | |
1332 | CATEGORY_MASK_UTF_16_LE_NOSIG); | |
1333 } | |
1334 else | |
1335 { | |
1336 unsigned char b1[256], b2[256]; | |
1337 int b1_variants = 1, b2_variants = 1; | |
1338 int n; | |
1339 | |
1340 bzero (b1, 256), bzero (b2, 256); | |
1341 b1[c1]++, b2[c2]++; | |
1342 for (n = 0; n < 256 && src < src_end; n++) | |
1343 { | |
1344 ONE_MORE_BYTE (c1); | |
1345 ONE_MORE_BYTE (c2); | |
1346 if (! b1[c1++]) b1_variants++; | |
1347 if (! b2[c2++]) b2_variants++; | |
1348 } | |
1349 if (b1_variants < b2_variants) | |
1350 detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG; | |
1351 else | |
1352 detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG; | |
1353 detect_info->rejected | |
1354 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE); | |
1349 } | 1355 } |
1350 no_more_source: | 1356 no_more_source: |
1351 return 1; | 1357 return 1; |
1352 } | 1358 } |
1353 | 1359 |
1363 int consumed_chars = 0, consumed_chars_base; | 1369 int consumed_chars = 0, consumed_chars_base; |
1364 int multibytep = coding->src_multibyte; | 1370 int multibytep = coding->src_multibyte; |
1365 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); | 1371 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); |
1366 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); | 1372 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); |
1367 int surrogate = CODING_UTF_16_SURROGATE (coding); | 1373 int surrogate = CODING_UTF_16_SURROGATE (coding); |
1368 Lisp_Object attr, eol_type, charset_list; | 1374 Lisp_Object attr, charset_list; |
1369 | 1375 |
1370 CODING_GET_INFO (coding, attr, eol_type, charset_list); | 1376 CODING_GET_INFO (coding, attr, charset_list); |
1371 | 1377 |
1372 if (bom == utf_16_with_bom) | 1378 if (bom == utf_16_with_bom) |
1373 { | 1379 { |
1374 int c, c1, c2; | 1380 int c, c1, c2; |
1375 | 1381 |
1458 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 1464 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
1459 int safe_room = 8; | 1465 int safe_room = 8; |
1460 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); | 1466 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); |
1461 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; | 1467 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; |
1462 int produced_chars = 0; | 1468 int produced_chars = 0; |
1463 Lisp_Object attrs, eol_type, charset_list; | 1469 Lisp_Object attrs, charset_list; |
1464 int c; | 1470 int c; |
1465 | 1471 |
1466 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 1472 CODING_GET_INFO (coding, attrs, charset_list); |
1467 | 1473 |
1468 if (bom != utf_16_without_bom) | 1474 if (bom != utf_16_without_bom) |
1469 { | 1475 { |
1470 ASSURE_DESTINATION (safe_room); | 1476 ASSURE_DESTINATION (safe_room); |
1471 if (big_endian) | 1477 if (big_endian) |
1926 const unsigned char *src_base; | 1932 const unsigned char *src_base; |
1927 int *charbuf = coding->charbuf; | 1933 int *charbuf = coding->charbuf; |
1928 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; | 1934 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
1929 int consumed_chars = 0, consumed_chars_base; | 1935 int consumed_chars = 0, consumed_chars_base; |
1930 int multibytep = coding->src_multibyte; | 1936 int multibytep = coding->src_multibyte; |
1931 Lisp_Object attrs, eol_type, charset_list; | 1937 Lisp_Object attrs, charset_list; |
1932 int char_offset = coding->produced_char; | 1938 int char_offset = coding->produced_char; |
1933 int last_offset = char_offset; | 1939 int last_offset = char_offset; |
1934 int last_id = charset_ascii; | 1940 int last_id = charset_ascii; |
1935 | 1941 |
1936 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 1942 CODING_GET_INFO (coding, attrs, charset_list); |
1937 | 1943 |
1938 while (1) | 1944 while (1) |
1939 { | 1945 { |
1940 int c; | 1946 int c; |
1941 | 1947 |
1947 | 1953 |
1948 ONE_MORE_BYTE (c); | 1954 ONE_MORE_BYTE (c); |
1949 | 1955 |
1950 if (c < 0x80) | 1956 if (c < 0x80) |
1951 { | 1957 { |
1952 if (c == '\r') | |
1953 { | |
1954 if (EQ (eol_type, Qdos)) | |
1955 { | |
1956 if (src == src_end) | |
1957 { | |
1958 coding->result = CODING_RESULT_INSUFFICIENT_SRC; | |
1959 goto no_more_source; | |
1960 } | |
1961 if (*src == '\n') | |
1962 ONE_MORE_BYTE (c); | |
1963 } | |
1964 else if (EQ (eol_type, Qmac)) | |
1965 c = '\n'; | |
1966 } | |
1967 *charbuf++ = c; | 1958 *charbuf++ = c; |
1968 char_offset++; | 1959 char_offset++; |
1969 } | 1960 } |
1970 else if (c == 0x80) | 1961 else if (c == 0x80) |
1971 { | 1962 { |
2050 int *charbuf_end = charbuf + coding->charbuf_used; | 2041 int *charbuf_end = charbuf + coding->charbuf_used; |
2051 unsigned char *dst = coding->destination + coding->produced; | 2042 unsigned char *dst = coding->destination + coding->produced; |
2052 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 2043 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
2053 int safe_room = 8; | 2044 int safe_room = 8; |
2054 int produced_chars = 0; | 2045 int produced_chars = 0; |
2055 Lisp_Object attrs, eol_type, charset_list; | 2046 Lisp_Object attrs, charset_list; |
2056 int c; | 2047 int c; |
2057 int preferred_charset_id = -1; | 2048 int preferred_charset_id = -1; |
2058 | 2049 |
2059 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 2050 CODING_GET_INFO (coding, attrs, charset_list); |
2060 if (! EQ (charset_list, Vemacs_mule_charset_list)) | 2051 if (! EQ (charset_list, Vemacs_mule_charset_list)) |
2061 { | 2052 { |
2062 CODING_ATTR_CHARSET_LIST (attrs) | 2053 CODING_ATTR_CHARSET_LIST (attrs) |
2063 = charset_list = Vemacs_mule_charset_list; | 2054 = charset_list = Vemacs_mule_charset_list; |
2064 } | 2055 } |
2804 int composition_state = COMPOSING_NO; | 2795 int composition_state = COMPOSING_NO; |
2805 enum composition_method method; | 2796 enum composition_method method; |
2806 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; | 2797 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; |
2807 int component_idx; | 2798 int component_idx; |
2808 int component_len; | 2799 int component_len; |
2809 Lisp_Object attrs, eol_type, charset_list; | 2800 Lisp_Object attrs, charset_list; |
2810 int char_offset = coding->produced_char; | 2801 int char_offset = coding->produced_char; |
2811 int last_offset = char_offset; | 2802 int last_offset = char_offset; |
2812 int last_id = charset_ascii; | 2803 int last_id = charset_ascii; |
2813 | 2804 |
2814 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 2805 CODING_GET_INFO (coding, attrs, charset_list); |
2815 setup_iso_safe_charsets (attrs); | 2806 setup_iso_safe_charsets (attrs); |
2816 | 2807 |
2817 while (1) | 2808 while (1) |
2818 { | 2809 { |
2819 int c1, c2; | 2810 int c1, c2; |
2874 case ISO_graphic_plane_1: | 2865 case ISO_graphic_plane_1: |
2875 if (charset_id_1 < 0) | 2866 if (charset_id_1 < 0) |
2876 goto invalid_code; | 2867 goto invalid_code; |
2877 charset = CHARSET_FROM_ID (charset_id_1); | 2868 charset = CHARSET_FROM_ID (charset_id_1); |
2878 break; | 2869 break; |
2879 | |
2880 case ISO_carriage_return: | |
2881 if (c1 == '\r') | |
2882 { | |
2883 if (EQ (eol_type, Qdos)) | |
2884 { | |
2885 if (src == src_end) | |
2886 { | |
2887 coding->result = CODING_RESULT_INSUFFICIENT_SRC; | |
2888 goto no_more_source; | |
2889 } | |
2890 if (*src == '\n') | |
2891 ONE_MORE_BYTE (c1); | |
2892 } | |
2893 else if (EQ (eol_type, Qmac)) | |
2894 c1 = '\n'; | |
2895 } | |
2896 /* fall through */ | |
2897 | 2870 |
2898 case ISO_control_0: | 2871 case ISO_control_0: |
2899 MAYBE_FINISH_COMPOSITION (); | 2872 MAYBE_FINISH_COMPOSITION (); |
2900 charset = CHARSET_FROM_ID (charset_ascii); | 2873 charset = CHARSET_FROM_ID (charset_ascii); |
2901 break; | 2874 break; |
3646 Lisp_Object attrs, eol_type, charset_list; | 3619 Lisp_Object attrs, eol_type, charset_list; |
3647 int ascii_compatible; | 3620 int ascii_compatible; |
3648 int c; | 3621 int c; |
3649 int preferred_charset_id = -1; | 3622 int preferred_charset_id = -1; |
3650 | 3623 |
3651 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 3624 CODING_GET_INFO (coding, attrs, charset_list); |
3625 eol_type = CODING_ID_EOL_TYPE (coding->id); | |
3626 if (VECTORP (eol_type)) | |
3627 eol_type = Qunix; | |
3628 | |
3652 setup_iso_safe_charsets (attrs); | 3629 setup_iso_safe_charsets (attrs); |
3653 /* Charset list may have been changed. */ | 3630 /* Charset list may have been changed. */ |
3654 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ | 3631 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ |
3655 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs)); | 3632 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs)); |
3656 | 3633 |
3929 int *charbuf = coding->charbuf; | 3906 int *charbuf = coding->charbuf; |
3930 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; | 3907 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
3931 int consumed_chars = 0, consumed_chars_base; | 3908 int consumed_chars = 0, consumed_chars_base; |
3932 int multibytep = coding->src_multibyte; | 3909 int multibytep = coding->src_multibyte; |
3933 struct charset *charset_roman, *charset_kanji, *charset_kana; | 3910 struct charset *charset_roman, *charset_kanji, *charset_kana; |
3934 Lisp_Object attrs, eol_type, charset_list, val; | 3911 Lisp_Object attrs, charset_list, val; |
3935 int char_offset = coding->produced_char; | 3912 int char_offset = coding->produced_char; |
3936 int last_offset = char_offset; | 3913 int last_offset = char_offset; |
3937 int last_id = charset_ascii; | 3914 int last_id = charset_ascii; |
3938 | 3915 |
3939 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 3916 CODING_GET_INFO (coding, attrs, charset_list); |
3940 | 3917 |
3941 val = charset_list; | 3918 val = charset_list; |
3942 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 3919 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
3943 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 3920 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
3944 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); | 3921 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); |
3945 | 3922 |
3946 while (1) | 3923 while (1) |
3947 { | 3924 { |
3948 int c, c1; | 3925 int c, c1; |
3926 struct charset *charset; | |
3949 | 3927 |
3950 src_base = src; | 3928 src_base = src; |
3951 consumed_chars_base = consumed_chars; | 3929 consumed_chars_base = consumed_chars; |
3952 | 3930 |
3953 if (charbuf >= charbuf_end) | 3931 if (charbuf >= charbuf_end) |
3954 break; | 3932 break; |
3955 | 3933 |
3956 ONE_MORE_BYTE (c); | 3934 ONE_MORE_BYTE (c); |
3957 | 3935 |
3958 if (c == '\r') | 3936 if (c < 0x80) |
3959 { | 3937 charset = charset_roman; |
3960 if (EQ (eol_type, Qdos)) | 3938 else |
3939 { | |
3940 if (c >= 0xF0) | |
3941 goto invalid_code; | |
3942 if (c < 0xA0 || c >= 0xE0) | |
3961 { | 3943 { |
3962 if (src == src_end) | 3944 /* SJIS -> JISX0208 */ |
3963 { | 3945 ONE_MORE_BYTE (c1); |
3964 coding->result = CODING_RESULT_INSUFFICIENT_SRC; | 3946 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) |
3965 goto no_more_source; | 3947 goto invalid_code; |
3966 } | 3948 c = (c << 8) | c1; |
3967 if (*src == '\n') | 3949 SJIS_TO_JIS (c); |
3968 ONE_MORE_BYTE (c); | 3950 charset = charset_kanji; |
3969 } | 3951 } |
3970 else if (EQ (eol_type, Qmac)) | 3952 else if (c > 0xA0) |
3971 c = '\n'; | 3953 { |
3972 } | 3954 /* SJIS -> JISX0201-Kana */ |
3973 else | 3955 c &= 0x7F; |
3974 { | 3956 charset = charset_kana; |
3975 struct charset *charset; | 3957 } |
3976 | |
3977 if (c < 0x80) | |
3978 charset = charset_roman; | |
3979 else | 3958 else |
3980 { | 3959 goto invalid_code; |
3981 if (c >= 0xF0) | 3960 } |
3982 goto invalid_code; | 3961 if (charset->id != charset_ascii |
3983 if (c < 0xA0 || c >= 0xE0) | 3962 && last_id != charset->id) |
3984 { | 3963 { |
3985 /* SJIS -> JISX0208 */ | 3964 if (last_id != charset_ascii) |
3986 ONE_MORE_BYTE (c1); | 3965 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); |
3987 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) | 3966 last_id = charset->id; |
3988 goto invalid_code; | 3967 last_offset = char_offset; |
3989 c = (c << 8) | c1; | 3968 } |
3990 SJIS_TO_JIS (c); | 3969 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); |
3991 charset = charset_kanji; | |
3992 } | |
3993 else if (c > 0xA0) | |
3994 { | |
3995 /* SJIS -> JISX0201-Kana */ | |
3996 c &= 0x7F; | |
3997 charset = charset_kana; | |
3998 } | |
3999 else | |
4000 goto invalid_code; | |
4001 } | |
4002 if (charset->id != charset_ascii | |
4003 && last_id != charset->id) | |
4004 { | |
4005 if (last_id != charset_ascii) | |
4006 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
4007 last_id = charset->id; | |
4008 last_offset = char_offset; | |
4009 } | |
4010 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | |
4011 } | |
4012 *charbuf++ = c; | 3970 *charbuf++ = c; |
4013 char_offset++; | 3971 char_offset++; |
4014 continue; | 3972 continue; |
4015 | 3973 |
4016 invalid_code: | 3974 invalid_code: |
4040 int *charbuf = coding->charbuf; | 3998 int *charbuf = coding->charbuf; |
4041 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; | 3999 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
4042 int consumed_chars = 0, consumed_chars_base; | 4000 int consumed_chars = 0, consumed_chars_base; |
4043 int multibytep = coding->src_multibyte; | 4001 int multibytep = coding->src_multibyte; |
4044 struct charset *charset_roman, *charset_big5; | 4002 struct charset *charset_roman, *charset_big5; |
4045 Lisp_Object attrs, eol_type, charset_list, val; | 4003 Lisp_Object attrs, charset_list, val; |
4046 int char_offset = coding->produced_char; | 4004 int char_offset = coding->produced_char; |
4047 int last_offset = char_offset; | 4005 int last_offset = char_offset; |
4048 int last_id = charset_ascii; | 4006 int last_id = charset_ascii; |
4049 | 4007 |
4050 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4008 CODING_GET_INFO (coding, attrs, charset_list); |
4051 val = charset_list; | 4009 val = charset_list; |
4052 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4010 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
4053 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 4011 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
4054 | 4012 |
4055 while (1) | 4013 while (1) |
4056 { | 4014 { |
4057 int c, c1; | 4015 int c, c1; |
4016 struct charset *charset; | |
4058 | 4017 |
4059 src_base = src; | 4018 src_base = src; |
4060 consumed_chars_base = consumed_chars; | 4019 consumed_chars_base = consumed_chars; |
4061 | 4020 |
4062 if (charbuf >= charbuf_end) | 4021 if (charbuf >= charbuf_end) |
4063 break; | 4022 break; |
4064 | 4023 |
4065 ONE_MORE_BYTE (c); | 4024 ONE_MORE_BYTE (c); |
4066 | 4025 |
4067 if (c == '\r') | 4026 if (c < 0x80) |
4068 { | 4027 charset = charset_roman; |
4069 if (EQ (eol_type, Qdos)) | |
4070 { | |
4071 if (src == src_end) | |
4072 { | |
4073 coding->result = CODING_RESULT_INSUFFICIENT_SRC; | |
4074 goto no_more_source; | |
4075 } | |
4076 if (*src == '\n') | |
4077 ONE_MORE_BYTE (c); | |
4078 } | |
4079 else if (EQ (eol_type, Qmac)) | |
4080 c = '\n'; | |
4081 } | |
4082 else | 4028 else |
4083 { | 4029 { |
4084 struct charset *charset; | 4030 /* BIG5 -> Big5 */ |
4085 if (c < 0x80) | 4031 if (c < 0xA1 || c > 0xFE) |
4086 charset = charset_roman; | 4032 goto invalid_code; |
4087 else | 4033 ONE_MORE_BYTE (c1); |
4088 { | 4034 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE) |
4089 /* BIG5 -> Big5 */ | 4035 goto invalid_code; |
4090 if (c < 0xA1 || c > 0xFE) | 4036 c = c << 8 | c1; |
4091 goto invalid_code; | 4037 charset = charset_big5; |
4092 ONE_MORE_BYTE (c1); | 4038 } |
4093 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE) | 4039 if (charset->id != charset_ascii |
4094 goto invalid_code; | 4040 && last_id != charset->id) |
4095 c = c << 8 | c1; | 4041 { |
4096 charset = charset_big5; | 4042 if (last_id != charset_ascii) |
4097 } | 4043 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); |
4098 if (charset->id != charset_ascii | 4044 last_id = charset->id; |
4099 && last_id != charset->id) | 4045 last_offset = char_offset; |
4100 { | 4046 } |
4101 if (last_id != charset_ascii) | 4047 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); |
4102 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
4103 last_id = charset->id; | |
4104 last_offset = char_offset; | |
4105 } | |
4106 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | |
4107 } | |
4108 | |
4109 *charbuf++ = c; | 4048 *charbuf++ = c; |
4110 char_offset++; | 4049 char_offset++; |
4111 continue; | 4050 continue; |
4112 | 4051 |
4113 invalid_code: | 4052 invalid_code: |
4144 int *charbuf_end = charbuf + coding->charbuf_used; | 4083 int *charbuf_end = charbuf + coding->charbuf_used; |
4145 unsigned char *dst = coding->destination + coding->produced; | 4084 unsigned char *dst = coding->destination + coding->produced; |
4146 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4085 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
4147 int safe_room = 4; | 4086 int safe_room = 4; |
4148 int produced_chars = 0; | 4087 int produced_chars = 0; |
4149 Lisp_Object attrs, eol_type, charset_list, val; | 4088 Lisp_Object attrs, charset_list, val; |
4150 int ascii_compatible; | 4089 int ascii_compatible; |
4151 struct charset *charset_roman, *charset_kanji, *charset_kana; | 4090 struct charset *charset_roman, *charset_kanji, *charset_kana; |
4152 int c; | 4091 int c; |
4153 | 4092 |
4154 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4093 CODING_GET_INFO (coding, attrs, charset_list); |
4155 val = charset_list; | 4094 val = charset_list; |
4156 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4095 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
4157 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4096 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
4158 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); | 4097 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); |
4159 | 4098 |
4219 int *charbuf_end = charbuf + coding->charbuf_used; | 4158 int *charbuf_end = charbuf + coding->charbuf_used; |
4220 unsigned char *dst = coding->destination + coding->produced; | 4159 unsigned char *dst = coding->destination + coding->produced; |
4221 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4160 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
4222 int safe_room = 4; | 4161 int safe_room = 4; |
4223 int produced_chars = 0; | 4162 int produced_chars = 0; |
4224 Lisp_Object attrs, eol_type, charset_list, val; | 4163 Lisp_Object attrs, charset_list, val; |
4225 int ascii_compatible; | 4164 int ascii_compatible; |
4226 struct charset *charset_roman, *charset_big5; | 4165 struct charset *charset_roman, *charset_big5; |
4227 int c; | 4166 int c; |
4228 | 4167 |
4229 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4168 CODING_GET_INFO (coding, attrs, charset_list); |
4230 val = charset_list; | 4169 val = charset_list; |
4231 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4170 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
4232 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 4171 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
4233 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); | 4172 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); |
4234 | 4173 |
4338 int consumed_chars = 0; | 4277 int consumed_chars = 0; |
4339 int multibytep = coding->src_multibyte; | 4278 int multibytep = coding->src_multibyte; |
4340 struct ccl_program ccl; | 4279 struct ccl_program ccl; |
4341 int source_charbuf[1024]; | 4280 int source_charbuf[1024]; |
4342 int source_byteidx[1024]; | 4281 int source_byteidx[1024]; |
4343 Lisp_Object attrs, eol_type, charset_list; | 4282 Lisp_Object attrs, charset_list; |
4344 | 4283 |
4345 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4284 CODING_GET_INFO (coding, attrs, charset_list); |
4346 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); | 4285 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); |
4347 | 4286 |
4348 while (src < src_end) | 4287 while (src < src_end) |
4349 { | 4288 { |
4350 const unsigned char *p = src; | 4289 const unsigned char *p = src; |
4418 unsigned char *dst = coding->destination + coding->produced; | 4357 unsigned char *dst = coding->destination + coding->produced; |
4419 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4358 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
4420 unsigned char *adjusted_dst_end = dst_end - 1; | 4359 unsigned char *adjusted_dst_end = dst_end - 1; |
4421 int destination_charbuf[1024]; | 4360 int destination_charbuf[1024]; |
4422 int i, produced_chars = 0; | 4361 int i, produced_chars = 0; |
4423 Lisp_Object attrs, eol_type, charset_list; | 4362 Lisp_Object attrs, charset_list; |
4424 | 4363 |
4425 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4364 CODING_GET_INFO (coding, attrs, charset_list); |
4426 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); | 4365 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); |
4427 | 4366 |
4428 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; | 4367 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; |
4429 ccl.dst_multibyte = coding->dst_multibyte; | 4368 ccl.dst_multibyte = coding->dst_multibyte; |
4430 | 4369 |
4619 const unsigned char *src_base; | 4558 const unsigned char *src_base; |
4620 int *charbuf = coding->charbuf; | 4559 int *charbuf = coding->charbuf; |
4621 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; | 4560 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
4622 int consumed_chars = 0, consumed_chars_base; | 4561 int consumed_chars = 0, consumed_chars_base; |
4623 int multibytep = coding->src_multibyte; | 4562 int multibytep = coding->src_multibyte; |
4624 Lisp_Object attrs, eol_type, charset_list, valids; | 4563 Lisp_Object attrs, charset_list, valids; |
4625 int char_offset = coding->produced_char; | 4564 int char_offset = coding->produced_char; |
4626 int last_offset = char_offset; | 4565 int last_offset = char_offset; |
4627 int last_id = charset_ascii; | 4566 int last_id = charset_ascii; |
4628 | 4567 |
4629 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4568 CODING_GET_INFO (coding, attrs, charset_list); |
4630 valids = AREF (attrs, coding_attr_charset_valids); | 4569 valids = AREF (attrs, coding_attr_charset_valids); |
4631 | 4570 |
4632 while (1) | 4571 while (1) |
4633 { | 4572 { |
4634 int c; | 4573 int c; |
4574 Lisp_Object val; | |
4575 struct charset *charset; | |
4576 int dim; | |
4577 int len = 1; | |
4578 unsigned code; | |
4635 | 4579 |
4636 src_base = src; | 4580 src_base = src; |
4637 consumed_chars_base = consumed_chars; | 4581 consumed_chars_base = consumed_chars; |
4638 | 4582 |
4639 if (charbuf >= charbuf_end) | 4583 if (charbuf >= charbuf_end) |
4640 break; | 4584 break; |
4641 | 4585 |
4642 ONE_MORE_BYTE (c); | 4586 ONE_MORE_BYTE (c); |
4643 if (c == '\r') | 4587 code = c; |
4644 { | 4588 |
4645 /* Here we assume that no charset maps '\r' to something | 4589 val = AREF (valids, c); |
4646 else. */ | 4590 if (NILP (val)) |
4647 if (EQ (eol_type, Qdos)) | 4591 goto invalid_code; |
4592 if (INTEGERP (val)) | |
4593 { | |
4594 charset = CHARSET_FROM_ID (XFASTINT (val)); | |
4595 dim = CHARSET_DIMENSION (charset); | |
4596 while (len < dim) | |
4648 { | 4597 { |
4649 if (src == src_end) | 4598 ONE_MORE_BYTE (c); |
4650 { | 4599 code = (code << 8) | c; |
4651 coding->result = CODING_RESULT_INSUFFICIENT_SRC; | 4600 len++; |
4652 goto no_more_source; | |
4653 } | |
4654 if (*src == '\n') | |
4655 ONE_MORE_BYTE (c); | |
4656 } | 4601 } |
4657 else if (EQ (eol_type, Qmac)) | 4602 CODING_DECODE_CHAR (coding, src, src_base, src_end, |
4658 c = '\n'; | 4603 charset, code, c); |
4659 } | 4604 } |
4660 else | 4605 else |
4661 { | 4606 { |
4662 Lisp_Object val; | 4607 /* VAL is a list of charset IDs. It is assured that the |
4663 struct charset *charset; | 4608 list is sorted by charset dimensions (smaller one |
4664 int dim; | 4609 comes first). */ |
4665 int len = 1; | 4610 while (CONSP (val)) |
4666 unsigned code = c; | |
4667 | |
4668 val = AREF (valids, c); | |
4669 if (NILP (val)) | |
4670 goto invalid_code; | |
4671 if (INTEGERP (val)) | |
4672 { | 4611 { |
4673 charset = CHARSET_FROM_ID (XFASTINT (val)); | 4612 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); |
4674 dim = CHARSET_DIMENSION (charset); | 4613 dim = CHARSET_DIMENSION (charset); |
4675 while (len < dim) | 4614 while (len < dim) |
4676 { | 4615 { |
4677 ONE_MORE_BYTE (c); | 4616 ONE_MORE_BYTE (c); |
4678 code = (code << 8) | c; | 4617 code = (code << 8) | c; |
4679 len++; | 4618 len++; |
4680 } | 4619 } |
4681 CODING_DECODE_CHAR (coding, src, src_base, src_end, | 4620 CODING_DECODE_CHAR (coding, src, src_base, |
4682 charset, code, c); | 4621 src_end, charset, code, c); |
4622 if (c >= 0) | |
4623 break; | |
4624 val = XCDR (val); | |
4683 } | 4625 } |
4684 else | 4626 } |
4685 { | 4627 if (c < 0) |
4686 /* VAL is a list of charset IDs. It is assured that the | 4628 goto invalid_code; |
4687 list is sorted by charset dimensions (smaller one | 4629 if (charset->id != charset_ascii |
4688 comes first). */ | 4630 && last_id != charset->id) |
4689 while (CONSP (val)) | 4631 { |
4690 { | 4632 if (last_id != charset_ascii) |
4691 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); | 4633 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); |
4692 dim = CHARSET_DIMENSION (charset); | 4634 last_id = charset->id; |
4693 while (len < dim) | 4635 last_offset = char_offset; |
4694 { | 4636 } |
4695 ONE_MORE_BYTE (c); | 4637 |
4696 code = (code << 8) | c; | |
4697 len++; | |
4698 } | |
4699 CODING_DECODE_CHAR (coding, src, src_base, | |
4700 src_end, charset, code, c); | |
4701 if (c >= 0) | |
4702 break; | |
4703 val = XCDR (val); | |
4704 } | |
4705 } | |
4706 if (c < 0) | |
4707 goto invalid_code; | |
4708 if (charset->id != charset_ascii | |
4709 && last_id != charset->id) | |
4710 { | |
4711 if (last_id != charset_ascii) | |
4712 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
4713 last_id = charset->id; | |
4714 last_offset = char_offset; | |
4715 } | |
4716 } | |
4717 *charbuf++ = c; | 4638 *charbuf++ = c; |
4718 char_offset++; | 4639 char_offset++; |
4719 continue; | 4640 continue; |
4720 | 4641 |
4721 invalid_code: | 4642 invalid_code: |
4744 int *charbuf_end = charbuf + coding->charbuf_used; | 4665 int *charbuf_end = charbuf + coding->charbuf_used; |
4745 unsigned char *dst = coding->destination + coding->produced; | 4666 unsigned char *dst = coding->destination + coding->produced; |
4746 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4667 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
4747 int safe_room = MAX_MULTIBYTE_LENGTH; | 4668 int safe_room = MAX_MULTIBYTE_LENGTH; |
4748 int produced_chars = 0; | 4669 int produced_chars = 0; |
4749 Lisp_Object attrs, eol_type, charset_list; | 4670 Lisp_Object attrs, charset_list; |
4750 int ascii_compatible; | 4671 int ascii_compatible; |
4751 int c; | 4672 int c; |
4752 | 4673 |
4753 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4674 CODING_GET_INFO (coding, attrs, charset_list); |
4754 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); | 4675 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); |
4755 | 4676 |
4756 while (charbuf < charbuf_end) | 4677 while (charbuf < charbuf_end) |
4757 { | 4678 { |
4758 struct charset *charset; | 4679 struct charset *charset; |
5248 } | 5169 } |
5249 return eol_seen; | 5170 return eol_seen; |
5250 } | 5171 } |
5251 | 5172 |
5252 | 5173 |
5253 static void | 5174 static Lisp_Object |
5254 adjust_coding_eol_type (coding, eol_seen) | 5175 adjust_coding_eol_type (coding, eol_seen) |
5255 struct coding_system *coding; | 5176 struct coding_system *coding; |
5256 int eol_seen; | 5177 int eol_seen; |
5257 { | 5178 { |
5258 Lisp_Object eol_type; | 5179 Lisp_Object eol_type; |
5259 | 5180 |
5260 eol_type = CODING_ID_EOL_TYPE (coding->id); | 5181 eol_type = CODING_ID_EOL_TYPE (coding->id); |
5261 if (eol_seen & EOL_SEEN_LF) | 5182 if (eol_seen & EOL_SEEN_LF) |
5262 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); | 5183 { |
5184 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); | |
5185 eol_type = Qunix; | |
5186 } | |
5263 else if (eol_seen & EOL_SEEN_CRLF) | 5187 else if (eol_seen & EOL_SEEN_CRLF) |
5264 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1)); | 5188 { |
5189 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1)); | |
5190 eol_type = Qdos; | |
5191 } | |
5265 else if (eol_seen & EOL_SEEN_CR) | 5192 else if (eol_seen & EOL_SEEN_CR) |
5266 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2)); | 5193 { |
5194 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2)); | |
5195 eol_type = Qmac; | |
5196 } | |
5197 return eol_type; | |
5267 } | 5198 } |
5268 | 5199 |
5269 /* Detect how a text specified in CODING is encoded. If a coding | 5200 /* Detect how a text specified in CODING is encoded. If a coding |
5270 system is detected, update fields of CODING by the detected coding | 5201 system is detected, update fields of CODING by the detected coding |
5271 system. */ | 5202 system. */ |
5287 now. */ | 5218 now. */ |
5288 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 5219 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
5289 { | 5220 { |
5290 int c, i; | 5221 int c, i; |
5291 | 5222 |
5292 for (src = coding->source; src < src_end; src++) | 5223 for (i = 0, src = coding->source; src < src_end; i++, src++) |
5293 { | 5224 { |
5294 c = *src; | 5225 c = *src; |
5295 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC | 5226 if (c & 0x80 || (c < 0x20 && (c == 0 |
5227 || c == ISO_CODE_ESC | |
5296 || c == ISO_CODE_SI | 5228 || c == ISO_CODE_SI |
5297 || c == ISO_CODE_SO))) | 5229 || c == ISO_CODE_SO))) |
5298 break; | 5230 break; |
5299 } | 5231 } |
5232 /* Skipped bytes must be even for utf-16 detector. */ | |
5233 if (i % 2) | |
5234 src--; | |
5300 coding->head_ascii = src - (coding->source + coding->consumed); | 5235 coding->head_ascii = src - (coding->source + coding->consumed); |
5301 | 5236 |
5302 if (coding->head_ascii < coding->src_bytes) | 5237 if (coding->head_ascii < coding->src_bytes) |
5303 { | 5238 { |
5304 struct coding_detection_info detect_info; | 5239 struct coding_detection_info detect_info; |
5322 if (detect_info.found & (1 << category)) | 5257 if (detect_info.found & (1 << category)) |
5323 break; | 5258 break; |
5324 } | 5259 } |
5325 else if ((*(this->detector)) (coding, &detect_info) | 5260 else if ((*(this->detector)) (coding, &detect_info) |
5326 && detect_info.found & (1 << category)) | 5261 && detect_info.found & (1 << category)) |
5327 break; | 5262 { |
5263 if (category == coding_category_utf_16_auto) | |
5264 { | |
5265 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | |
5266 category = coding_category_utf_16_le; | |
5267 else | |
5268 category = coding_category_utf_16_be; | |
5269 } | |
5270 break; | |
5271 } | |
5328 } | 5272 } |
5329 if (i < coding_category_raw_text) | 5273 if (i < coding_category_raw_text) |
5330 setup_coding_system (CODING_ID_NAME (this->id), coding); | 5274 setup_coding_system (CODING_ID_NAME (this->id), coding); |
5331 else if (detect_info.rejected == CATEGORY_MASK_ANY) | 5275 else if (detect_info.rejected == CATEGORY_MASK_ANY) |
5332 setup_coding_system (Qraw_text, coding); | 5276 setup_coding_system (Qraw_text, coding); |
5338 setup_coding_system (CODING_ID_NAME (this->id), coding); | 5282 setup_coding_system (CODING_ID_NAME (this->id), coding); |
5339 break; | 5283 break; |
5340 } | 5284 } |
5341 } | 5285 } |
5342 } | 5286 } |
5343 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16)) | 5287 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
5288 == coding_category_utf_16_auto) | |
5344 { | 5289 { |
5345 Lisp_Object coding_systems; | 5290 Lisp_Object coding_systems; |
5346 struct coding_detection_info detect_info; | 5291 struct coding_detection_info detect_info; |
5347 | 5292 |
5348 coding_systems | 5293 coding_systems |
5349 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); | 5294 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); |
5350 detect_info.found = detect_info.rejected = 0; | 5295 detect_info.found = detect_info.rejected = 0; |
5351 if (CONSP (coding_systems) | 5296 if (CONSP (coding_systems) |
5352 && detect_coding_utf_16 (coding, &detect_info) | 5297 && detect_coding_utf_16 (coding, &detect_info)) |
5353 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE | |
5354 | CATEGORY_MASK_UTF_16_BE))) | |
5355 { | 5298 { |
5356 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | 5299 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) |
5357 setup_coding_system (XCAR (coding_systems), coding); | 5300 setup_coding_system (XCAR (coding_systems), coding); |
5358 else | 5301 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) |
5359 setup_coding_system (XCDR (coding_systems), coding); | 5302 setup_coding_system (XCDR (coding_systems), coding); |
5360 } | 5303 } |
5361 } | |
5362 | |
5363 attrs = CODING_ID_ATTRS (coding->id); | |
5364 coding_type = CODING_ATTR_TYPE (attrs); | |
5365 | |
5366 /* If we have not yet decided the EOL type, detect it now. But, the | |
5367 detection is impossible for a CCL based coding system, in which | |
5368 case, we detct the EOL type after decoding. */ | |
5369 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)) | |
5370 && ! EQ (coding_type, Qccl)) | |
5371 { | |
5372 int eol_seen = detect_eol (coding->source, coding->src_bytes, | |
5373 (enum coding_category) XINT (CODING_ATTR_CATEGORY (attrs))); | |
5374 | |
5375 if (eol_seen != EOL_SEEN_NONE) | |
5376 adjust_coding_eol_type (coding, eol_seen); | |
5377 } | 5304 } |
5378 } | 5305 } |
5379 | 5306 |
5380 | 5307 |
5381 static void | 5308 static void |
5382 decode_eol (coding) | 5309 decode_eol (coding) |
5383 struct coding_system *coding; | 5310 struct coding_system *coding; |
5384 { | 5311 { |
5385 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))) | 5312 Lisp_Object eol_type; |
5386 { | 5313 unsigned char *p, *pbeg, *pend; |
5387 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); | 5314 |
5388 unsigned char *pend = p + coding->produced; | 5315 eol_type = CODING_ID_EOL_TYPE (coding->id); |
5316 if (EQ (eol_type, Qunix)) | |
5317 return; | |
5318 | |
5319 if (NILP (coding->dst_object)) | |
5320 pbeg = coding->destination; | |
5321 else | |
5322 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte); | |
5323 pend = pbeg + coding->produced; | |
5324 | |
5325 if (VECTORP (eol_type)) | |
5326 { | |
5389 int eol_seen = EOL_SEEN_NONE; | 5327 int eol_seen = EOL_SEEN_NONE; |
5390 | 5328 |
5391 for (; p < pend; p++) | 5329 for (p = pbeg; p < pend; p++) |
5392 { | 5330 { |
5393 if (*p == '\n') | 5331 if (*p == '\n') |
5394 eol_seen |= EOL_SEEN_LF; | 5332 eol_seen |= EOL_SEEN_LF; |
5395 else if (*p == '\r') | 5333 else if (*p == '\r') |
5396 { | 5334 { |
5401 } | 5339 } |
5402 else | 5340 else |
5403 eol_seen |= EOL_SEEN_CR; | 5341 eol_seen |= EOL_SEEN_CR; |
5404 } | 5342 } |
5405 } | 5343 } |
5344 if (eol_seen != EOL_SEEN_NONE | |
5345 && eol_seen != EOL_SEEN_LF | |
5346 && eol_seen != EOL_SEEN_CRLF | |
5347 && eol_seen != EOL_SEEN_CR) | |
5348 eol_seen = EOL_SEEN_LF; | |
5406 if (eol_seen != EOL_SEEN_NONE) | 5349 if (eol_seen != EOL_SEEN_NONE) |
5407 adjust_coding_eol_type (coding, eol_seen); | 5350 eol_type = adjust_coding_eol_type (coding, eol_seen); |
5408 } | 5351 } |
5409 | 5352 |
5410 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac)) | 5353 if (EQ (eol_type, Qmac)) |
5411 { | 5354 { |
5412 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); | 5355 for (p = pbeg; p < pend; p++) |
5413 unsigned char *pend = p + coding->produced; | |
5414 | |
5415 for (; p < pend; p++) | |
5416 if (*p == '\r') | 5356 if (*p == '\r') |
5417 *p = '\n'; | 5357 *p = '\n'; |
5418 } | 5358 } |
5419 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos)) | 5359 else if (EQ (eol_type, Qdos)) |
5420 { | 5360 { |
5421 unsigned char *p, *pbeg, *pend; | 5361 int n = 0; |
5422 Lisp_Object undo_list; | 5362 |
5423 | 5363 if (NILP (coding->dst_object)) |
5424 move_gap_both (coding->dst_pos + coding->produced_char, | 5364 { |
5425 coding->dst_pos_byte + coding->produced); | 5365 for (p = pend - 2; p >= pbeg; p--) |
5426 undo_list = current_buffer->undo_list; | 5366 if (*p == '\r') |
5427 current_buffer->undo_list = Qt; | 5367 { |
5428 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0); | 5368 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1); |
5429 current_buffer->undo_list = undo_list; | 5369 n++; |
5430 pbeg = GPT_ADDR; | 5370 } |
5431 pend = pbeg + coding->produced; | 5371 } |
5432 | 5372 else |
5433 for (p = pend - 1; p >= pbeg; p--) | 5373 { |
5434 if (*p == '\r') | 5374 for (p = pend - 2; p >= pbeg; p--) |
5435 { | 5375 if (*p == '\r') |
5436 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1); | 5376 { |
5437 pend--; | 5377 int pos_byte = coding->dst_pos_byte + (p - pbeg); |
5438 } | 5378 int pos = BYTE_TO_CHAR (pos_byte); |
5439 coding->produced_char -= coding->produced - (pend - pbeg); | 5379 |
5440 coding->produced = pend - pbeg; | 5380 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0); |
5441 insert_from_gap (coding->produced_char, coding->produced); | 5381 n++; |
5382 } | |
5383 } | |
5384 coding->produced -= n; | |
5385 coding->produced_char -= n; | |
5442 } | 5386 } |
5443 } | 5387 } |
5444 | 5388 |
5445 static void | 5389 static void |
5446 translate_chars (coding, table) | 5390 translate_chars (coding, table) |
5794 static int | 5738 static int |
5795 decode_coding (coding) | 5739 decode_coding (coding) |
5796 struct coding_system *coding; | 5740 struct coding_system *coding; |
5797 { | 5741 { |
5798 Lisp_Object attrs; | 5742 Lisp_Object attrs; |
5743 Lisp_Object undo_list; | |
5799 | 5744 |
5800 if (BUFFERP (coding->src_object) | 5745 if (BUFFERP (coding->src_object) |
5801 && coding->src_pos > 0 | 5746 && coding->src_pos > 0 |
5802 && coding->src_pos < GPT | 5747 && coding->src_pos < GPT |
5803 && coding->src_pos + coding->src_chars > GPT) | 5748 && coding->src_pos + coding->src_chars > GPT) |
5804 move_gap_both (coding->src_pos, coding->src_pos_byte); | 5749 move_gap_both (coding->src_pos, coding->src_pos_byte); |
5805 | 5750 |
5751 undo_list = Qt; | |
5806 if (BUFFERP (coding->dst_object)) | 5752 if (BUFFERP (coding->dst_object)) |
5807 { | 5753 { |
5808 if (current_buffer != XBUFFER (coding->dst_object)) | 5754 if (current_buffer != XBUFFER (coding->dst_object)) |
5809 set_buffer_internal (XBUFFER (coding->dst_object)); | 5755 set_buffer_internal (XBUFFER (coding->dst_object)); |
5810 if (GPT != PT) | 5756 if (GPT != PT) |
5811 move_gap_both (PT, PT_BYTE); | 5757 move_gap_both (PT, PT_BYTE); |
5758 undo_list = current_buffer->undo_list; | |
5759 current_buffer->undo_list = Qt; | |
5812 } | 5760 } |
5813 | 5761 |
5814 coding->consumed = coding->consumed_char = 0; | 5762 coding->consumed = coding->consumed_char = 0; |
5815 coding->produced = coding->produced_char = 0; | 5763 coding->produced = coding->produced_char = 0; |
5816 coding->chars_at_source = 0; | 5764 coding->chars_at_source = 0; |
5836 produce_annotation (coding); | 5784 produce_annotation (coding); |
5837 } | 5785 } |
5838 while (coding->consumed < coding->src_bytes | 5786 while (coding->consumed < coding->src_bytes |
5839 && ! coding->result); | 5787 && ! coding->result); |
5840 | 5788 |
5841 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl) | |
5842 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id)) | |
5843 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)) | |
5844 decode_eol (coding); | |
5845 | |
5846 coding->carryover_bytes = 0; | 5789 coding->carryover_bytes = 0; |
5847 if (coding->consumed < coding->src_bytes) | 5790 if (coding->consumed < coding->src_bytes) |
5848 { | 5791 { |
5849 int nbytes = coding->src_bytes - coding->consumed; | 5792 int nbytes = coding->src_bytes - coding->consumed; |
5850 const unsigned char *src; | 5793 const unsigned char *src; |
5878 *p++ = *src++; | 5821 *p++ = *src++; |
5879 } | 5822 } |
5880 coding->consumed = coding->src_bytes; | 5823 coding->consumed = coding->src_bytes; |
5881 } | 5824 } |
5882 | 5825 |
5826 if (BUFFERP (coding->dst_object)) | |
5827 { | |
5828 current_buffer->undo_list = undo_list; | |
5829 record_insert (coding->dst_pos, coding->produced_char); | |
5830 } | |
5831 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)) | |
5832 decode_eol (coding); | |
5883 return coding->result; | 5833 return coding->result; |
5884 } | 5834 } |
5885 | 5835 |
5886 | 5836 |
5887 /* Extract an annotation datum from a composition starting at POS and | 5837 /* Extract an annotation datum from a composition starting at POS and |
6037 stop = stop_charset = pos; | 5987 stop = stop_charset = pos; |
6038 else | 5988 else |
6039 stop_charset = end_pos; | 5989 stop_charset = end_pos; |
6040 } | 5990 } |
6041 | 5991 |
6042 /* Compensate for CRLF and annotation. */ | 5992 /* Compensate for CRLF and conversion. */ |
6043 buf_end -= 1 + MAX_ANNOTATION_LENGTH; | 5993 buf_end -= 1 + MAX_ANNOTATION_LENGTH; |
6044 while (buf < buf_end) | 5994 while (buf < buf_end) |
6045 { | 5995 { |
6046 if (pos == stop) | 5996 if (pos == stop) |
6047 { | 5997 { |
6152 | 6102 |
6153 return (coding->result); | 6103 return (coding->result); |
6154 } | 6104 } |
6155 | 6105 |
6156 | 6106 |
6157 /* Stack of working buffers used in code conversion. An nil element | 6107 /* Name (or base name) of work buffer for code conversion. */ |
6158 means that the code conversion of that level is not using a working | 6108 static Lisp_Object Vcode_conversion_workbuf_name; |
6159 buffer. */ | 6109 |
6160 Lisp_Object Vcode_conversion_work_buf_list; | 6110 /* A working buffer used by the top level conversion. Once it is |
6161 | 6111 created, it is never destroyed. It has the name |
6162 /* A working buffer used by the top level conversion. */ | 6112 Vcode_conversion_workbuf_name. The other working buffers are |
6163 Lisp_Object Vcode_conversion_reused_work_buf; | 6113 destroyed after the use is finished, and their names are modified |
6164 | 6114 versions of Vcode_conversion_workbuf_name. */ |
6165 | 6115 static Lisp_Object Vcode_conversion_reused_workbuf; |
6166 /* Return a working buffer that can be freely used by the following | 6116 |
6167 code conversion. MULTIBYTEP specifies the multibyteness of the | 6117 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */ |
6168 buffer. */ | 6118 static int reused_workbuf_in_use; |
6119 | |
6120 | |
6121 /* Return a working buffer of code convesion. MULTIBYTE specifies the | |
6122 multibyteness of returning buffer. */ | |
6169 | 6123 |
6170 Lisp_Object | 6124 Lisp_Object |
6171 make_conversion_work_buffer (multibytep, depth) | 6125 make_conversion_work_buffer (multibyte) |
6172 int multibytep, depth; | 6126 { |
6173 { | 6127 Lisp_Object name, workbuf; |
6174 struct buffer *current = current_buffer; | 6128 struct buffer *current; |
6175 Lisp_Object buf, name; | 6129 |
6176 | 6130 if (reused_workbuf_in_use++) |
6177 if (depth == 0) | 6131 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); |
6178 { | |
6179 if (NILP (Vcode_conversion_reused_work_buf)) | |
6180 Vcode_conversion_reused_work_buf | |
6181 = Fget_buffer_create (build_string (" *code-converting-work<0>*")); | |
6182 buf = Vcode_conversion_reused_work_buf; | |
6183 } | |
6184 else | 6132 else |
6185 { | 6133 name = Vcode_conversion_workbuf_name; |
6186 if (depth < 0) | 6134 workbuf = Fget_buffer_create (name); |
6187 { | 6135 current = current_buffer; |
6188 name = build_string (" *code-converting-work*"); | 6136 set_buffer_internal (XBUFFER (workbuf)); |
6189 name = Fgenerate_new_buffer_name (name, Qnil); | 6137 Ferase_buffer (); |
6190 } | |
6191 else | |
6192 { | |
6193 char str[128]; | |
6194 | |
6195 sprintf (str, " *code-converting-work*<%d>", depth); | |
6196 name = build_string (str); | |
6197 } | |
6198 buf = Fget_buffer_create (name); | |
6199 } | |
6200 set_buffer_internal (XBUFFER (buf)); | |
6201 current_buffer->undo_list = Qt; | 6138 current_buffer->undo_list = Qt; |
6202 Ferase_buffer (); | 6139 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; |
6203 Fset_buffer_multibyte (multibytep ? Qt : Qnil); | |
6204 set_buffer_internal (current); | 6140 set_buffer_internal (current); |
6205 return buf; | 6141 return workbuf; |
6206 } | 6142 } |
6143 | |
6207 | 6144 |
6208 static Lisp_Object | 6145 static Lisp_Object |
6209 code_conversion_restore (buffer) | 6146 code_conversion_restore (arg) |
6210 Lisp_Object buffer; | 6147 Lisp_Object arg; |
6211 { | 6148 { |
6212 Lisp_Object workbuf; | 6149 Lisp_Object current, workbuf; |
6213 | 6150 |
6214 workbuf = XCAR (Vcode_conversion_work_buf_list); | 6151 current = XCAR (arg); |
6215 if (! NILP (workbuf) | 6152 workbuf = XCDR (arg); |
6216 && ! EQ (workbuf, Vcode_conversion_reused_work_buf) | 6153 if (! NILP (workbuf)) |
6217 && ! NILP (Fbuffer_live_p (workbuf))) | 6154 { |
6218 Fkill_buffer (workbuf); | 6155 if (EQ (workbuf, Vcode_conversion_reused_workbuf)) |
6219 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list); | 6156 reused_workbuf_in_use = 0; |
6220 set_buffer_internal (XBUFFER (buffer)); | 6157 else if (! NILP (Fbuffer_live_p (workbuf))) |
6158 Fkill_buffer (workbuf); | |
6159 } | |
6160 set_buffer_internal (XBUFFER (current)); | |
6221 return Qnil; | 6161 return Qnil; |
6222 } | 6162 } |
6223 | 6163 |
6224 static Lisp_Object | 6164 Lisp_Object |
6225 code_conversion_save (buffer, with_work_buf, multibyte) | 6165 code_conversion_save (with_work_buf, multibyte) |
6226 Lisp_Object buffer; | |
6227 int with_work_buf, multibyte; | 6166 int with_work_buf, multibyte; |
6228 { | 6167 { |
6229 Lisp_Object workbuf; | 6168 Lisp_Object workbuf = Qnil; |
6230 | 6169 |
6231 if (with_work_buf) | 6170 if (with_work_buf) |
6232 { | 6171 workbuf = make_conversion_work_buffer (multibyte); |
6233 int depth = XINT (Flength (Vcode_conversion_work_buf_list)); | 6172 record_unwind_protect (code_conversion_restore, |
6234 | 6173 Fcons (Fcurrent_buffer (), workbuf)); |
6235 workbuf = make_conversion_work_buffer (multibyte, depth); | |
6236 } | |
6237 else | |
6238 workbuf = Qnil; | |
6239 Vcode_conversion_work_buf_list | |
6240 = Fcons (workbuf, Vcode_conversion_work_buf_list); | |
6241 record_unwind_protect (code_conversion_restore, buffer); | |
6242 return workbuf; | 6174 return workbuf; |
6243 } | 6175 } |
6244 | 6176 |
6245 int | 6177 int |
6246 decode_coding_gap (coding, chars, bytes) | 6178 decode_coding_gap (coding, chars, bytes) |
6247 struct coding_system *coding; | 6179 struct coding_system *coding; |
6248 EMACS_INT chars, bytes; | 6180 EMACS_INT chars, bytes; |
6249 { | 6181 { |
6250 int count = specpdl_ptr - specpdl; | 6182 int count = specpdl_ptr - specpdl; |
6251 Lisp_Object attrs; | 6183 Lisp_Object attrs; |
6252 Lisp_Object buffer; | 6184 |
6253 | 6185 code_conversion_save (0, 0); |
6254 buffer = Fcurrent_buffer (); | 6186 |
6255 code_conversion_save (buffer, 0, 0); | 6187 coding->src_object = Fcurrent_buffer (); |
6256 | |
6257 coding->src_object = buffer; | |
6258 coding->src_chars = chars; | |
6259 coding->src_bytes = bytes; | |
6260 coding->src_pos = -chars; | |
6261 coding->src_pos_byte = -bytes; | |
6262 coding->src_multibyte = chars < bytes; | |
6263 coding->dst_object = buffer; | |
6264 coding->dst_pos = PT; | |
6265 coding->dst_pos_byte = PT_BYTE; | |
6266 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); | |
6267 coding->mode |= CODING_MODE_LAST_BLOCK; | |
6268 | |
6269 if (CODING_REQUIRE_DETECTION (coding)) | |
6270 detect_coding (coding); | |
6271 | |
6272 decode_coding (coding); | |
6273 | |
6274 attrs = CODING_ID_ATTRS (coding->id); | |
6275 if (! NILP (CODING_ATTR_POST_READ (attrs))) | |
6276 { | |
6277 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE; | |
6278 Lisp_Object val; | |
6279 | |
6280 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte); | |
6281 val = call1 (CODING_ATTR_POST_READ (attrs), | |
6282 make_number (coding->produced_char)); | |
6283 CHECK_NATNUM (val); | |
6284 coding->produced_char += Z - prev_Z; | |
6285 coding->produced += Z_BYTE - prev_Z_BYTE; | |
6286 } | |
6287 | |
6288 unbind_to (count, Qnil); | |
6289 return coding->result; | |
6290 } | |
6291 | |
6292 int | |
6293 encode_coding_gap (coding, chars, bytes) | |
6294 struct coding_system *coding; | |
6295 EMACS_INT chars, bytes; | |
6296 { | |
6297 int count = specpdl_ptr - specpdl; | |
6298 Lisp_Object buffer; | |
6299 | |
6300 buffer = Fcurrent_buffer (); | |
6301 code_conversion_save (buffer, 0, 0); | |
6302 | |
6303 coding->src_object = buffer; | |
6304 coding->src_chars = chars; | 6188 coding->src_chars = chars; |
6305 coding->src_bytes = bytes; | 6189 coding->src_bytes = bytes; |
6306 coding->src_pos = -chars; | 6190 coding->src_pos = -chars; |
6307 coding->src_pos_byte = -bytes; | 6191 coding->src_pos_byte = -bytes; |
6308 coding->src_multibyte = chars < bytes; | 6192 coding->src_multibyte = chars < bytes; |
6309 coding->dst_object = coding->src_object; | 6193 coding->dst_object = coding->src_object; |
6310 coding->dst_pos = PT; | 6194 coding->dst_pos = PT; |
6311 coding->dst_pos_byte = PT_BYTE; | 6195 coding->dst_pos_byte = PT_BYTE; |
6196 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); | |
6197 coding->mode |= CODING_MODE_LAST_BLOCK; | |
6198 | |
6199 if (CODING_REQUIRE_DETECTION (coding)) | |
6200 detect_coding (coding); | |
6201 | |
6202 decode_coding (coding); | |
6203 | |
6204 attrs = CODING_ID_ATTRS (coding->id); | |
6205 if (! NILP (CODING_ATTR_POST_READ (attrs))) | |
6206 { | |
6207 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE; | |
6208 Lisp_Object val; | |
6209 | |
6210 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte); | |
6211 val = call1 (CODING_ATTR_POST_READ (attrs), | |
6212 make_number (coding->produced_char)); | |
6213 CHECK_NATNUM (val); | |
6214 coding->produced_char += Z - prev_Z; | |
6215 coding->produced += Z_BYTE - prev_Z_BYTE; | |
6216 } | |
6217 | |
6218 unbind_to (count, Qnil); | |
6219 return coding->result; | |
6220 } | |
6221 | |
6222 int | |
6223 encode_coding_gap (coding, chars, bytes) | |
6224 struct coding_system *coding; | |
6225 EMACS_INT chars, bytes; | |
6226 { | |
6227 int count = specpdl_ptr - specpdl; | |
6228 | |
6229 code_conversion_save (0, 0); | |
6230 | |
6231 coding->src_object = Fcurrent_buffer (); | |
6232 coding->src_chars = chars; | |
6233 coding->src_bytes = bytes; | |
6234 coding->src_pos = -chars; | |
6235 coding->src_pos_byte = -bytes; | |
6236 coding->src_multibyte = chars < bytes; | |
6237 coding->dst_object = coding->src_object; | |
6238 coding->dst_pos = PT; | |
6239 coding->dst_pos_byte = PT_BYTE; | |
6312 | 6240 |
6313 encode_coding (coding); | 6241 encode_coding (coding); |
6314 | 6242 |
6315 unbind_to (count, Qnil); | 6243 unbind_to (count, Qnil); |
6316 return coding->result; | 6244 return coding->result; |
6407 | 6335 |
6408 if (EQ (dst_object, Qt) | 6336 if (EQ (dst_object, Qt) |
6409 || (! NILP (CODING_ATTR_POST_READ (attrs)) | 6337 || (! NILP (CODING_ATTR_POST_READ (attrs)) |
6410 && NILP (dst_object))) | 6338 && NILP (dst_object))) |
6411 { | 6339 { |
6412 coding->dst_object = code_conversion_save (buffer, 1, 1); | 6340 coding->dst_object = code_conversion_save (1, 1); |
6413 coding->dst_pos = BEG; | 6341 coding->dst_pos = BEG; |
6414 coding->dst_pos_byte = BEG_BYTE; | 6342 coding->dst_pos_byte = BEG_BYTE; |
6415 coding->dst_multibyte = 1; | 6343 coding->dst_multibyte = 1; |
6416 } | 6344 } |
6417 else if (BUFFERP (dst_object)) | 6345 else if (BUFFERP (dst_object)) |
6418 { | 6346 { |
6419 code_conversion_save (buffer, 0, 0); | 6347 code_conversion_save (0, 0); |
6420 coding->dst_object = dst_object; | 6348 coding->dst_object = dst_object; |
6421 coding->dst_pos = BUF_PT (XBUFFER (dst_object)); | 6349 coding->dst_pos = BUF_PT (XBUFFER (dst_object)); |
6422 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); | 6350 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); |
6423 coding->dst_multibyte | 6351 coding->dst_multibyte |
6424 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters); | 6352 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters); |
6425 } | 6353 } |
6426 else | 6354 else |
6427 { | 6355 { |
6428 code_conversion_save (buffer, 0, 0); | 6356 code_conversion_save (0, 0); |
6429 coding->dst_object = Qnil; | 6357 coding->dst_object = Qnil; |
6430 coding->dst_multibyte = 1; | 6358 coding->dst_multibyte = 1; |
6431 } | 6359 } |
6432 | 6360 |
6433 decode_coding (coding); | 6361 decode_coding (coding); |
6522 | 6450 |
6523 attrs = CODING_ID_ATTRS (coding->id); | 6451 attrs = CODING_ID_ATTRS (coding->id); |
6524 | 6452 |
6525 if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) | 6453 if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) |
6526 { | 6454 { |
6527 coding->src_object = code_conversion_save (buffer, 1, | 6455 coding->src_object = code_conversion_save (1, coding->src_multibyte); |
6528 coding->src_multibyte); | |
6529 set_buffer_internal (XBUFFER (coding->src_object)); | 6456 set_buffer_internal (XBUFFER (coding->src_object)); |
6530 if (STRINGP (src_object)) | 6457 if (STRINGP (src_object)) |
6531 insert_from_string (src_object, from, from_byte, chars, bytes, 0); | 6458 insert_from_string (src_object, from, from_byte, chars, bytes, 0); |
6532 else if (BUFFERP (src_object)) | 6459 else if (BUFFERP (src_object)) |
6533 insert_from_buffer (XBUFFER (src_object), from, chars, 0); | 6460 insert_from_buffer (XBUFFER (src_object), from, chars, 0); |
6553 coding->src_pos_byte = BEG_BYTE; | 6480 coding->src_pos_byte = BEG_BYTE; |
6554 coding->src_multibyte = Z < Z_BYTE; | 6481 coding->src_multibyte = Z < Z_BYTE; |
6555 } | 6482 } |
6556 else if (STRINGP (src_object)) | 6483 else if (STRINGP (src_object)) |
6557 { | 6484 { |
6558 code_conversion_save (buffer, 0, 0); | 6485 code_conversion_save (0, 0); |
6559 coding->src_pos = from; | 6486 coding->src_pos = from; |
6560 coding->src_pos_byte = from_byte; | 6487 coding->src_pos_byte = from_byte; |
6561 } | 6488 } |
6562 else if (BUFFERP (src_object)) | 6489 else if (BUFFERP (src_object)) |
6563 { | 6490 { |
6564 code_conversion_save (buffer, 0, 0); | 6491 code_conversion_save (0, 0); |
6565 set_buffer_internal (XBUFFER (src_object)); | 6492 set_buffer_internal (XBUFFER (src_object)); |
6566 if (EQ (src_object, dst_object)) | 6493 if (EQ (src_object, dst_object)) |
6567 { | 6494 { |
6568 saved_pt = PT, saved_pt_byte = PT_BYTE; | 6495 saved_pt = PT, saved_pt_byte = PT_BYTE; |
6569 coding->src_object = del_range_1 (from, to, 1, 1); | 6496 coding->src_object = del_range_1 (from, to, 1, 1); |
6577 coding->src_pos = from; | 6504 coding->src_pos = from; |
6578 coding->src_pos_byte = from_byte; | 6505 coding->src_pos_byte = from_byte; |
6579 } | 6506 } |
6580 } | 6507 } |
6581 else | 6508 else |
6582 code_conversion_save (buffer, 0, 0); | 6509 code_conversion_save (0, 0); |
6583 | 6510 |
6584 if (BUFFERP (dst_object)) | 6511 if (BUFFERP (dst_object)) |
6585 { | 6512 { |
6586 coding->dst_object = dst_object; | 6513 coding->dst_object = dst_object; |
6587 if (EQ (src_object, dst_object)) | 6514 if (EQ (src_object, dst_object)) |
6733 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format | 6660 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format |
6734 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'), | 6661 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'), |
6735 detect only text-format. */ | 6662 detect only text-format. */ |
6736 | 6663 |
6737 Lisp_Object | 6664 Lisp_Object |
6738 detect_coding_system (src, src_bytes, highest, multibytep, coding_system) | 6665 detect_coding_system (src, src_chars, src_bytes, highest, multibytep, |
6666 coding_system) | |
6739 const unsigned char *src; | 6667 const unsigned char *src; |
6740 int src_bytes, highest; | 6668 int src_chars, src_bytes, highest; |
6741 int multibytep; | 6669 int multibytep; |
6742 Lisp_Object coding_system; | 6670 Lisp_Object coding_system; |
6743 { | 6671 { |
6744 const unsigned char *src_end = src + src_bytes; | 6672 const unsigned char *src_end = src + src_bytes; |
6745 Lisp_Object attrs, eol_type; | 6673 Lisp_Object attrs, eol_type; |
6746 Lisp_Object val; | 6674 Lisp_Object val; |
6747 struct coding_system coding; | 6675 struct coding_system coding; |
6748 int id; | 6676 int id; |
6749 struct coding_detection_info detect_info; | 6677 struct coding_detection_info detect_info; |
6678 enum coding_category base_category; | |
6750 | 6679 |
6751 if (NILP (coding_system)) | 6680 if (NILP (coding_system)) |
6752 coding_system = Qundecided; | 6681 coding_system = Qundecided; |
6753 setup_coding_system (coding_system, &coding); | 6682 setup_coding_system (coding_system, &coding); |
6754 attrs = CODING_ID_ATTRS (coding.id); | 6683 attrs = CODING_ID_ATTRS (coding.id); |
6755 eol_type = CODING_ID_EOL_TYPE (coding.id); | 6684 eol_type = CODING_ID_EOL_TYPE (coding.id); |
6756 coding_system = CODING_ATTR_BASE_NAME (attrs); | 6685 coding_system = CODING_ATTR_BASE_NAME (attrs); |
6757 | 6686 |
6758 coding.source = src; | 6687 coding.source = src; |
6688 coding.src_chars = src_chars; | |
6759 coding.src_bytes = src_bytes; | 6689 coding.src_bytes = src_bytes; |
6760 coding.src_multibyte = multibytep; | 6690 coding.src_multibyte = multibytep; |
6761 coding.consumed = 0; | 6691 coding.consumed = 0; |
6762 coding.mode |= CODING_MODE_LAST_BLOCK; | 6692 coding.mode |= CODING_MODE_LAST_BLOCK; |
6763 | 6693 |
6764 detect_info.checked = detect_info.found = detect_info.rejected = 0; | 6694 detect_info.checked = detect_info.found = detect_info.rejected = 0; |
6765 | 6695 |
6766 /* At first, detect text-format if necessary. */ | 6696 /* At first, detect text-format if necessary. */ |
6767 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided) | 6697 base_category = XINT (CODING_ATTR_CATEGORY (attrs)); |
6698 if (base_category == coding_category_undecided) | |
6768 { | 6699 { |
6769 enum coding_category category; | 6700 enum coding_category category; |
6770 struct coding_system *this; | 6701 struct coding_system *this; |
6771 int c, i; | 6702 int c, i; |
6772 | 6703 |
6773 for (; src < src_end; src++) | 6704 /* Skip all ASCII bytes except for a few ISO2022 controls. */ |
6705 for (i = 0; src < src_end; i++, src++) | |
6774 { | 6706 { |
6775 c = *src; | 6707 c = *src; |
6776 if (c & 0x80 | 6708 if (c & 0x80 || (c < 0x20 && (c == 0 |
6777 || (c < 0x20 && (c == ISO_CODE_ESC | 6709 || c == ISO_CODE_ESC |
6778 || c == ISO_CODE_SI | 6710 || c == ISO_CODE_SI |
6779 || c == ISO_CODE_SO))) | 6711 || c == ISO_CODE_SO))) |
6780 break; | 6712 break; |
6781 } | 6713 } |
6714 /* Skipped bytes must be even for utf-16 detecor. */ | |
6715 if (i % 2) | |
6716 src--; | |
6782 coding.head_ascii = src - coding.source; | 6717 coding.head_ascii = src - coding.source; |
6783 | 6718 |
6784 if (src < src_end) | 6719 if (src < src_end) |
6785 for (i = 0; i < coding_category_raw_text; i++) | 6720 for (i = 0; i < coding_category_raw_text; i++) |
6786 { | 6721 { |
6803 else | 6738 else |
6804 { | 6739 { |
6805 if ((*(this->detector)) (&coding, &detect_info) | 6740 if ((*(this->detector)) (&coding, &detect_info) |
6806 && highest | 6741 && highest |
6807 && (detect_info.found & (1 << category))) | 6742 && (detect_info.found & (1 << category))) |
6808 break; | 6743 { |
6744 if (category == coding_category_utf_16_auto) | |
6745 { | |
6746 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | |
6747 category = coding_category_utf_16_le; | |
6748 else | |
6749 category = coding_category_utf_16_be; | |
6750 } | |
6751 break; | |
6752 } | |
6809 } | 6753 } |
6810 } | 6754 } |
6811 | |
6812 | 6755 |
6813 if (detect_info.rejected == CATEGORY_MASK_ANY) | 6756 if (detect_info.rejected == CATEGORY_MASK_ANY) |
6814 { | 6757 { |
6815 detect_info.found = CATEGORY_MASK_RAW_TEXT; | 6758 detect_info.found = CATEGORY_MASK_RAW_TEXT; |
6816 id = coding_categories[coding_category_raw_text].id; | 6759 id = coding_categories[coding_category_raw_text].id; |
6863 id = coding_categories[category].id; | 6806 id = coding_categories[category].id; |
6864 val = Fcons (make_number (id), val); | 6807 val = Fcons (make_number (id), val); |
6865 } | 6808 } |
6866 } | 6809 } |
6867 detect_info.found |= found; | 6810 detect_info.found |= found; |
6811 } | |
6812 } | |
6813 else if (base_category == coding_category_utf_16_auto) | |
6814 { | |
6815 if (detect_coding_utf_16 (&coding, &detect_info)) | |
6816 { | |
6817 enum coding_category category; | |
6818 struct coding_system *this; | |
6819 | |
6820 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | |
6821 this = coding_categories + coding_category_utf_16_le; | |
6822 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | |
6823 this = coding_categories + coding_category_utf_16_be; | |
6824 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG) | |
6825 this = coding_categories + coding_category_utf_16_be_nosig; | |
6826 else | |
6827 this = coding_categories + coding_category_utf_16_le_nosig; | |
6828 val = Fcons (make_number (this->id), Qnil); | |
6868 } | 6829 } |
6869 } | 6830 } |
6870 else | 6831 else |
6871 { | 6832 { |
6872 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); | 6833 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); |
6967 | 6928 |
6968 if (from < GPT && to >= GPT) | 6929 if (from < GPT && to >= GPT) |
6969 move_gap_both (to, to_byte); | 6930 move_gap_both (to, to_byte); |
6970 | 6931 |
6971 return detect_coding_system (BYTE_POS_ADDR (from_byte), | 6932 return detect_coding_system (BYTE_POS_ADDR (from_byte), |
6972 to_byte - from_byte, | 6933 to - from, to_byte - from_byte, |
6973 !NILP (highest), | 6934 !NILP (highest), |
6974 !NILP (current_buffer | 6935 !NILP (current_buffer |
6975 ->enable_multibyte_characters), | 6936 ->enable_multibyte_characters), |
6976 Qnil); | 6937 Qnil); |
6977 } | 6938 } |
6990 (string, highest) | 6951 (string, highest) |
6991 Lisp_Object string, highest; | 6952 Lisp_Object string, highest; |
6992 { | 6953 { |
6993 CHECK_STRING (string); | 6954 CHECK_STRING (string); |
6994 | 6955 |
6995 return detect_coding_system (SDATA (string), SBYTES (string), | 6956 return detect_coding_system (SDATA (string), |
6957 SCHARS (string), SBYTES (string), | |
6996 !NILP (highest), STRING_MULTIBYTE (string), | 6958 !NILP (highest), STRING_MULTIBYTE (string), |
6997 Qnil); | 6959 Qnil); |
6998 } | 6960 } |
6999 | 6961 |
7000 | 6962 |
8615 iso_code_class[i] = ISO_control_1; | 8577 iso_code_class[i] = ISO_control_1; |
8616 for (i = 0xA1; i < 0xFF; i++) | 8578 for (i = 0xA1; i < 0xFF; i++) |
8617 iso_code_class[i] = ISO_graphic_plane_1; | 8579 iso_code_class[i] = ISO_graphic_plane_1; |
8618 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; | 8580 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; |
8619 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF; | 8581 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF; |
8620 iso_code_class[ISO_CODE_CR] = ISO_carriage_return; | |
8621 iso_code_class[ISO_CODE_SO] = ISO_shift_out; | 8582 iso_code_class[ISO_CODE_SO] = ISO_shift_out; |
8622 iso_code_class[ISO_CODE_SI] = ISO_shift_in; | 8583 iso_code_class[ISO_CODE_SI] = ISO_shift_in; |
8623 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; | 8584 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; |
8624 iso_code_class[ISO_CODE_ESC] = ISO_escape; | 8585 iso_code_class[ISO_CODE_ESC] = ISO_escape; |
8625 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; | 8586 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; |
8653 Vsjis_coding_system = Qnil; | 8614 Vsjis_coding_system = Qnil; |
8654 | 8615 |
8655 staticpro (&Vbig5_coding_system); | 8616 staticpro (&Vbig5_coding_system); |
8656 Vbig5_coding_system = Qnil; | 8617 Vbig5_coding_system = Qnil; |
8657 | 8618 |
8658 staticpro (&Vcode_conversion_work_buf_list); | 8619 staticpro (&Vcode_conversion_reused_workbuf); |
8659 Vcode_conversion_work_buf_list = Qnil; | 8620 Vcode_conversion_reused_workbuf = Qnil; |
8660 | 8621 |
8661 staticpro (&Vcode_conversion_reused_work_buf); | 8622 staticpro (&Vcode_conversion_workbuf_name); |
8662 Vcode_conversion_reused_work_buf = Qnil; | 8623 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*"); |
8624 | |
8625 reused_workbuf_in_use = 0; | |
8663 | 8626 |
8664 DEFSYM (Qcharset, "charset"); | 8627 DEFSYM (Qcharset, "charset"); |
8665 DEFSYM (Qtarget_idx, "target-idx"); | 8628 DEFSYM (Qtarget_idx, "target-idx"); |
8666 DEFSYM (Qcoding_system_history, "coding-system-history"); | 8629 DEFSYM (Qcoding_system_history, "coding-system-history"); |
8667 Fset (Qcoding_system_history, Qnil); | 8630 Fset (Qcoding_system_history, Qnil); |