comparison src/coding.c @ 89665:9010cefe8d29

(enum iso_code_class_type): Delete ISO_carriage_return. (CODING_GET_INFO): Delete argument eol_type. Callers changed. (decode_coding_utf_8): Don't do eol converion. (detect_coding_utf_16): Check coding->src_chars, not coding->src_bytes. Add heuristics for those that have no signature. (decode_coding_emacs_mule): Don't do eol converion. (decode_coding_iso_2022): Likewise. (decode_coding_sjis): Likewise. (decode_coding_big5): Likewise. (decode_coding_charset): Likewise. (adjust_coding_eol_type): Return a new coding system. (detect_coding): Don't detect eol. Fix for utf-16 detection. (decode_eol): In case of CRLF->LF conversion, use del_range_2 on each change. (decode_coding): Pay attention to undo_list. Do eol convesion for all types of coding-systems (if necessary). (Vcode_conversion_work_buf_list): Delete it. (Vcode_conversion_reused_workbuf): Renamed from Vcode_conversion_reused_work_buf. (Vcode_conversion_workbuf_name): New variable. (reused_workbuf_in_use): New variable. (make_conversion_work_buffer): Delete the arg DEPTH. (code_conversion_restore): Argument changed to cons. (code_conversion_save): Delete the argument BUFFER. Callers changed. (detect_coding_system): New argument src_chars. Callers changed. Fix for utf-16 detection. (init_coding_once): Don't use ISO_carriage_return. (syms_of_coding): Initialized Vcode_conversion_workbuf_name and reused_workbuf_in_use.
author Kenichi Handa <handa@m17n.org>
date Tue, 02 Dec 2003 01:40:27 +0000
parents cbaa9fd1aa5c
children cf1ff36f92dc
comparison
equal deleted inserted replaced
89664:5548dd3d1a7c 89665:9010cefe8d29
489 enum iso_code_class_type 489 enum iso_code_class_type
490 { 490 {
491 ISO_control_0, /* Control codes in the range 491 ISO_control_0, /* Control codes in the range
492 0x00..0x1F and 0x7F, except for the 492 0x00..0x1F and 0x7F, except for the
493 following 5 codes. */ 493 following 5 codes. */
494 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
495 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ 494 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
496 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ 495 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
497 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ 496 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
498 ISO_escape, /* ISO_CODE_SO (0x1B) */ 497 ISO_escape, /* ISO_CODE_SO (0x1B) */
499 ISO_control_1, /* Control codes in the range 498 ISO_control_1, /* Control codes in the range
708 #endif 707 #endif
709 #ifndef max 708 #ifndef max
710 #define max(a, b) ((a) > (b) ? (a) : (b)) 709 #define max(a, b) ((a) > (b) ? (a) : (b))
711 #endif 710 #endif
712 711
713 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \ 712 #define CODING_GET_INFO(coding, attrs, charset_list) \
714 do { \ 713 do { \
715 attrs = CODING_ID_ATTRS (coding->id); \ 714 (attrs) = CODING_ID_ATTRS ((coding)->id); \
716 eol_type = CODING_ID_EOL_TYPE (coding->id); \ 715 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
717 if (VECTORP (eol_type)) \
718 eol_type = Qunix; \
719 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
720 } while (0) 716 } while (0)
721 717
722 718
723 /* Safely get one byte from the source text pointed by SRC which ends 719 /* Safely get one byte from the source text pointed by SRC which ends
724 at SRC_END, and set C to that byte. If there are not enough bytes 720 at SRC_END, and set C to that byte. If there are not enough bytes
1130 const unsigned char *src_base; 1126 const unsigned char *src_base;
1131 int *charbuf = coding->charbuf; 1127 int *charbuf = coding->charbuf;
1132 int *charbuf_end = charbuf + coding->charbuf_size; 1128 int *charbuf_end = charbuf + coding->charbuf_size;
1133 int consumed_chars = 0, consumed_chars_base; 1129 int consumed_chars = 0, consumed_chars_base;
1134 int multibytep = coding->src_multibyte; 1130 int multibytep = coding->src_multibyte;
1135 Lisp_Object attr, eol_type, charset_list; 1131 Lisp_Object attr, charset_list;
1136 1132
1137 CODING_GET_INFO (coding, attr, eol_type, charset_list); 1133 CODING_GET_INFO (coding, attr, charset_list);
1138 1134
1139 while (1) 1135 while (1)
1140 { 1136 {
1141 int c, c1, c2, c3, c4, c5; 1137 int c, c1, c2, c3, c4, c5;
1142 1138
1148 1144
1149 ONE_MORE_BYTE (c1); 1145 ONE_MORE_BYTE (c1);
1150 if (UTF_8_1_OCTET_P(c1)) 1146 if (UTF_8_1_OCTET_P(c1))
1151 { 1147 {
1152 c = c1; 1148 c = c1;
1153 if (c == '\r')
1154 {
1155 if (EQ (eol_type, Qdos))
1156 {
1157 if (src == src_end)
1158 {
1159 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1160 goto no_more_source;
1161 }
1162 if (*src == '\n')
1163 ONE_MORE_BYTE (c);
1164 }
1165 else if (EQ (eol_type, Qmac))
1166 c = '\n';
1167 }
1168 } 1149 }
1169 else 1150 else
1170 { 1151 {
1171 ONE_MORE_BYTE (c2); 1152 ONE_MORE_BYTE (c2);
1172 if (! UTF_8_EXTRA_OCTET_P (c2)) 1153 if (! UTF_8_EXTRA_OCTET_P (c2))
1323 int multibytep = coding->src_multibyte; 1304 int multibytep = coding->src_multibyte;
1324 int consumed_chars = 0; 1305 int consumed_chars = 0;
1325 int c1, c2; 1306 int c1, c2;
1326 1307
1327 detect_info->checked |= CATEGORY_MASK_UTF_16; 1308 detect_info->checked |= CATEGORY_MASK_UTF_16;
1328
1329 if (coding->mode & CODING_MODE_LAST_BLOCK 1309 if (coding->mode & CODING_MODE_LAST_BLOCK
1330 && (coding->src_bytes & 1)) 1310 && (coding->src_chars & 1))
1331 { 1311 {
1332 detect_info->rejected |= CATEGORY_MASK_UTF_16; 1312 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1333 return 0; 1313 return 0;
1334 } 1314 }
1315
1335 ONE_MORE_BYTE (c1); 1316 ONE_MORE_BYTE (c1);
1336 ONE_MORE_BYTE (c2); 1317 ONE_MORE_BYTE (c2);
1337
1338 if ((c1 == 0xFF) && (c2 == 0xFE)) 1318 if ((c1 == 0xFF) && (c2 == 0xFE))
1339 { 1319 {
1340 detect_info->found |= (CATEGORY_MASK_UTF_16_LE 1320 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1341 | CATEGORY_MASK_UTF_16_AUTO); 1321 | CATEGORY_MASK_UTF_16_AUTO);
1342 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; 1322 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1323 | CATEGORY_MASK_UTF_16_BE_NOSIG
1324 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1343 } 1325 }
1344 else if ((c1 == 0xFE) && (c2 == 0xFF)) 1326 else if ((c1 == 0xFE) && (c2 == 0xFF))
1345 { 1327 {
1346 detect_info->found |= (CATEGORY_MASK_UTF_16_BE 1328 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1347 | CATEGORY_MASK_UTF_16_AUTO); 1329 | CATEGORY_MASK_UTF_16_AUTO);
1348 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; 1330 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1331 | CATEGORY_MASK_UTF_16_BE_NOSIG
1332 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1333 }
1334 else
1335 {
1336 unsigned char b1[256], b2[256];
1337 int b1_variants = 1, b2_variants = 1;
1338 int n;
1339
1340 bzero (b1, 256), bzero (b2, 256);
1341 b1[c1]++, b2[c2]++;
1342 for (n = 0; n < 256 && src < src_end; n++)
1343 {
1344 ONE_MORE_BYTE (c1);
1345 ONE_MORE_BYTE (c2);
1346 if (! b1[c1++]) b1_variants++;
1347 if (! b2[c2++]) b2_variants++;
1348 }
1349 if (b1_variants < b2_variants)
1350 detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1351 else
1352 detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1353 detect_info->rejected
1354 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1349 } 1355 }
1350 no_more_source: 1356 no_more_source:
1351 return 1; 1357 return 1;
1352 } 1358 }
1353 1359
1363 int consumed_chars = 0, consumed_chars_base; 1369 int consumed_chars = 0, consumed_chars_base;
1364 int multibytep = coding->src_multibyte; 1370 int multibytep = coding->src_multibyte;
1365 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); 1371 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1366 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); 1372 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1367 int surrogate = CODING_UTF_16_SURROGATE (coding); 1373 int surrogate = CODING_UTF_16_SURROGATE (coding);
1368 Lisp_Object attr, eol_type, charset_list; 1374 Lisp_Object attr, charset_list;
1369 1375
1370 CODING_GET_INFO (coding, attr, eol_type, charset_list); 1376 CODING_GET_INFO (coding, attr, charset_list);
1371 1377
1372 if (bom == utf_16_with_bom) 1378 if (bom == utf_16_with_bom)
1373 { 1379 {
1374 int c, c1, c2; 1380 int c, c1, c2;
1375 1381
1458 unsigned char *dst_end = coding->destination + coding->dst_bytes; 1464 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1459 int safe_room = 8; 1465 int safe_room = 8;
1460 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); 1466 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1461 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; 1467 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1462 int produced_chars = 0; 1468 int produced_chars = 0;
1463 Lisp_Object attrs, eol_type, charset_list; 1469 Lisp_Object attrs, charset_list;
1464 int c; 1470 int c;
1465 1471
1466 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 1472 CODING_GET_INFO (coding, attrs, charset_list);
1467 1473
1468 if (bom != utf_16_without_bom) 1474 if (bom != utf_16_without_bom)
1469 { 1475 {
1470 ASSURE_DESTINATION (safe_room); 1476 ASSURE_DESTINATION (safe_room);
1471 if (big_endian) 1477 if (big_endian)
1926 const unsigned char *src_base; 1932 const unsigned char *src_base;
1927 int *charbuf = coding->charbuf; 1933 int *charbuf = coding->charbuf;
1928 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 1934 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1929 int consumed_chars = 0, consumed_chars_base; 1935 int consumed_chars = 0, consumed_chars_base;
1930 int multibytep = coding->src_multibyte; 1936 int multibytep = coding->src_multibyte;
1931 Lisp_Object attrs, eol_type, charset_list; 1937 Lisp_Object attrs, charset_list;
1932 int char_offset = coding->produced_char; 1938 int char_offset = coding->produced_char;
1933 int last_offset = char_offset; 1939 int last_offset = char_offset;
1934 int last_id = charset_ascii; 1940 int last_id = charset_ascii;
1935 1941
1936 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 1942 CODING_GET_INFO (coding, attrs, charset_list);
1937 1943
1938 while (1) 1944 while (1)
1939 { 1945 {
1940 int c; 1946 int c;
1941 1947
1947 1953
1948 ONE_MORE_BYTE (c); 1954 ONE_MORE_BYTE (c);
1949 1955
1950 if (c < 0x80) 1956 if (c < 0x80)
1951 { 1957 {
1952 if (c == '\r')
1953 {
1954 if (EQ (eol_type, Qdos))
1955 {
1956 if (src == src_end)
1957 {
1958 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1959 goto no_more_source;
1960 }
1961 if (*src == '\n')
1962 ONE_MORE_BYTE (c);
1963 }
1964 else if (EQ (eol_type, Qmac))
1965 c = '\n';
1966 }
1967 *charbuf++ = c; 1958 *charbuf++ = c;
1968 char_offset++; 1959 char_offset++;
1969 } 1960 }
1970 else if (c == 0x80) 1961 else if (c == 0x80)
1971 { 1962 {
2050 int *charbuf_end = charbuf + coding->charbuf_used; 2041 int *charbuf_end = charbuf + coding->charbuf_used;
2051 unsigned char *dst = coding->destination + coding->produced; 2042 unsigned char *dst = coding->destination + coding->produced;
2052 unsigned char *dst_end = coding->destination + coding->dst_bytes; 2043 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2053 int safe_room = 8; 2044 int safe_room = 8;
2054 int produced_chars = 0; 2045 int produced_chars = 0;
2055 Lisp_Object attrs, eol_type, charset_list; 2046 Lisp_Object attrs, charset_list;
2056 int c; 2047 int c;
2057 int preferred_charset_id = -1; 2048 int preferred_charset_id = -1;
2058 2049
2059 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 2050 CODING_GET_INFO (coding, attrs, charset_list);
2060 if (! EQ (charset_list, Vemacs_mule_charset_list)) 2051 if (! EQ (charset_list, Vemacs_mule_charset_list))
2061 { 2052 {
2062 CODING_ATTR_CHARSET_LIST (attrs) 2053 CODING_ATTR_CHARSET_LIST (attrs)
2063 = charset_list = Vemacs_mule_charset_list; 2054 = charset_list = Vemacs_mule_charset_list;
2064 } 2055 }
2804 int composition_state = COMPOSING_NO; 2795 int composition_state = COMPOSING_NO;
2805 enum composition_method method; 2796 enum composition_method method;
2806 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; 2797 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2807 int component_idx; 2798 int component_idx;
2808 int component_len; 2799 int component_len;
2809 Lisp_Object attrs, eol_type, charset_list; 2800 Lisp_Object attrs, charset_list;
2810 int char_offset = coding->produced_char; 2801 int char_offset = coding->produced_char;
2811 int last_offset = char_offset; 2802 int last_offset = char_offset;
2812 int last_id = charset_ascii; 2803 int last_id = charset_ascii;
2813 2804
2814 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 2805 CODING_GET_INFO (coding, attrs, charset_list);
2815 setup_iso_safe_charsets (attrs); 2806 setup_iso_safe_charsets (attrs);
2816 2807
2817 while (1) 2808 while (1)
2818 { 2809 {
2819 int c1, c2; 2810 int c1, c2;
2874 case ISO_graphic_plane_1: 2865 case ISO_graphic_plane_1:
2875 if (charset_id_1 < 0) 2866 if (charset_id_1 < 0)
2876 goto invalid_code; 2867 goto invalid_code;
2877 charset = CHARSET_FROM_ID (charset_id_1); 2868 charset = CHARSET_FROM_ID (charset_id_1);
2878 break; 2869 break;
2879
2880 case ISO_carriage_return:
2881 if (c1 == '\r')
2882 {
2883 if (EQ (eol_type, Qdos))
2884 {
2885 if (src == src_end)
2886 {
2887 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2888 goto no_more_source;
2889 }
2890 if (*src == '\n')
2891 ONE_MORE_BYTE (c1);
2892 }
2893 else if (EQ (eol_type, Qmac))
2894 c1 = '\n';
2895 }
2896 /* fall through */
2897 2870
2898 case ISO_control_0: 2871 case ISO_control_0:
2899 MAYBE_FINISH_COMPOSITION (); 2872 MAYBE_FINISH_COMPOSITION ();
2900 charset = CHARSET_FROM_ID (charset_ascii); 2873 charset = CHARSET_FROM_ID (charset_ascii);
2901 break; 2874 break;
3646 Lisp_Object attrs, eol_type, charset_list; 3619 Lisp_Object attrs, eol_type, charset_list;
3647 int ascii_compatible; 3620 int ascii_compatible;
3648 int c; 3621 int c;
3649 int preferred_charset_id = -1; 3622 int preferred_charset_id = -1;
3650 3623
3651 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 3624 CODING_GET_INFO (coding, attrs, charset_list);
3625 eol_type = CODING_ID_EOL_TYPE (coding->id);
3626 if (VECTORP (eol_type))
3627 eol_type = Qunix;
3628
3652 setup_iso_safe_charsets (attrs); 3629 setup_iso_safe_charsets (attrs);
3653 /* Charset list may have been changed. */ 3630 /* Charset list may have been changed. */
3654 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ 3631 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
3655 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs)); 3632 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3656 3633
3929 int *charbuf = coding->charbuf; 3906 int *charbuf = coding->charbuf;
3930 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 3907 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3931 int consumed_chars = 0, consumed_chars_base; 3908 int consumed_chars = 0, consumed_chars_base;
3932 int multibytep = coding->src_multibyte; 3909 int multibytep = coding->src_multibyte;
3933 struct charset *charset_roman, *charset_kanji, *charset_kana; 3910 struct charset *charset_roman, *charset_kanji, *charset_kana;
3934 Lisp_Object attrs, eol_type, charset_list, val; 3911 Lisp_Object attrs, charset_list, val;
3935 int char_offset = coding->produced_char; 3912 int char_offset = coding->produced_char;
3936 int last_offset = char_offset; 3913 int last_offset = char_offset;
3937 int last_id = charset_ascii; 3914 int last_id = charset_ascii;
3938 3915
3939 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 3916 CODING_GET_INFO (coding, attrs, charset_list);
3940 3917
3941 val = charset_list; 3918 val = charset_list;
3942 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 3919 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3943 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 3920 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3944 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); 3921 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3945 3922
3946 while (1) 3923 while (1)
3947 { 3924 {
3948 int c, c1; 3925 int c, c1;
3926 struct charset *charset;
3949 3927
3950 src_base = src; 3928 src_base = src;
3951 consumed_chars_base = consumed_chars; 3929 consumed_chars_base = consumed_chars;
3952 3930
3953 if (charbuf >= charbuf_end) 3931 if (charbuf >= charbuf_end)
3954 break; 3932 break;
3955 3933
3956 ONE_MORE_BYTE (c); 3934 ONE_MORE_BYTE (c);
3957 3935
3958 if (c == '\r') 3936 if (c < 0x80)
3959 { 3937 charset = charset_roman;
3960 if (EQ (eol_type, Qdos)) 3938 else
3939 {
3940 if (c >= 0xF0)
3941 goto invalid_code;
3942 if (c < 0xA0 || c >= 0xE0)
3961 { 3943 {
3962 if (src == src_end) 3944 /* SJIS -> JISX0208 */
3963 { 3945 ONE_MORE_BYTE (c1);
3964 coding->result = CODING_RESULT_INSUFFICIENT_SRC; 3946 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3965 goto no_more_source; 3947 goto invalid_code;
3966 } 3948 c = (c << 8) | c1;
3967 if (*src == '\n') 3949 SJIS_TO_JIS (c);
3968 ONE_MORE_BYTE (c); 3950 charset = charset_kanji;
3969 } 3951 }
3970 else if (EQ (eol_type, Qmac)) 3952 else if (c > 0xA0)
3971 c = '\n'; 3953 {
3972 } 3954 /* SJIS -> JISX0201-Kana */
3973 else 3955 c &= 0x7F;
3974 { 3956 charset = charset_kana;
3975 struct charset *charset; 3957 }
3976
3977 if (c < 0x80)
3978 charset = charset_roman;
3979 else 3958 else
3980 { 3959 goto invalid_code;
3981 if (c >= 0xF0) 3960 }
3982 goto invalid_code; 3961 if (charset->id != charset_ascii
3983 if (c < 0xA0 || c >= 0xE0) 3962 && last_id != charset->id)
3984 { 3963 {
3985 /* SJIS -> JISX0208 */ 3964 if (last_id != charset_ascii)
3986 ONE_MORE_BYTE (c1); 3965 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3987 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) 3966 last_id = charset->id;
3988 goto invalid_code; 3967 last_offset = char_offset;
3989 c = (c << 8) | c1; 3968 }
3990 SJIS_TO_JIS (c); 3969 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3991 charset = charset_kanji;
3992 }
3993 else if (c > 0xA0)
3994 {
3995 /* SJIS -> JISX0201-Kana */
3996 c &= 0x7F;
3997 charset = charset_kana;
3998 }
3999 else
4000 goto invalid_code;
4001 }
4002 if (charset->id != charset_ascii
4003 && last_id != charset->id)
4004 {
4005 if (last_id != charset_ascii)
4006 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4007 last_id = charset->id;
4008 last_offset = char_offset;
4009 }
4010 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4011 }
4012 *charbuf++ = c; 3970 *charbuf++ = c;
4013 char_offset++; 3971 char_offset++;
4014 continue; 3972 continue;
4015 3973
4016 invalid_code: 3974 invalid_code:
4040 int *charbuf = coding->charbuf; 3998 int *charbuf = coding->charbuf;
4041 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 3999 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4042 int consumed_chars = 0, consumed_chars_base; 4000 int consumed_chars = 0, consumed_chars_base;
4043 int multibytep = coding->src_multibyte; 4001 int multibytep = coding->src_multibyte;
4044 struct charset *charset_roman, *charset_big5; 4002 struct charset *charset_roman, *charset_big5;
4045 Lisp_Object attrs, eol_type, charset_list, val; 4003 Lisp_Object attrs, charset_list, val;
4046 int char_offset = coding->produced_char; 4004 int char_offset = coding->produced_char;
4047 int last_offset = char_offset; 4005 int last_offset = char_offset;
4048 int last_id = charset_ascii; 4006 int last_id = charset_ascii;
4049 4007
4050 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4008 CODING_GET_INFO (coding, attrs, charset_list);
4051 val = charset_list; 4009 val = charset_list;
4052 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4010 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4053 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); 4011 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4054 4012
4055 while (1) 4013 while (1)
4056 { 4014 {
4057 int c, c1; 4015 int c, c1;
4016 struct charset *charset;
4058 4017
4059 src_base = src; 4018 src_base = src;
4060 consumed_chars_base = consumed_chars; 4019 consumed_chars_base = consumed_chars;
4061 4020
4062 if (charbuf >= charbuf_end) 4021 if (charbuf >= charbuf_end)
4063 break; 4022 break;
4064 4023
4065 ONE_MORE_BYTE (c); 4024 ONE_MORE_BYTE (c);
4066 4025
4067 if (c == '\r') 4026 if (c < 0x80)
4068 { 4027 charset = charset_roman;
4069 if (EQ (eol_type, Qdos))
4070 {
4071 if (src == src_end)
4072 {
4073 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4074 goto no_more_source;
4075 }
4076 if (*src == '\n')
4077 ONE_MORE_BYTE (c);
4078 }
4079 else if (EQ (eol_type, Qmac))
4080 c = '\n';
4081 }
4082 else 4028 else
4083 { 4029 {
4084 struct charset *charset; 4030 /* BIG5 -> Big5 */
4085 if (c < 0x80) 4031 if (c < 0xA1 || c > 0xFE)
4086 charset = charset_roman; 4032 goto invalid_code;
4087 else 4033 ONE_MORE_BYTE (c1);
4088 { 4034 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4089 /* BIG5 -> Big5 */ 4035 goto invalid_code;
4090 if (c < 0xA1 || c > 0xFE) 4036 c = c << 8 | c1;
4091 goto invalid_code; 4037 charset = charset_big5;
4092 ONE_MORE_BYTE (c1); 4038 }
4093 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE) 4039 if (charset->id != charset_ascii
4094 goto invalid_code; 4040 && last_id != charset->id)
4095 c = c << 8 | c1; 4041 {
4096 charset = charset_big5; 4042 if (last_id != charset_ascii)
4097 } 4043 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4098 if (charset->id != charset_ascii 4044 last_id = charset->id;
4099 && last_id != charset->id) 4045 last_offset = char_offset;
4100 { 4046 }
4101 if (last_id != charset_ascii) 4047 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4102 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4103 last_id = charset->id;
4104 last_offset = char_offset;
4105 }
4106 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4107 }
4108
4109 *charbuf++ = c; 4048 *charbuf++ = c;
4110 char_offset++; 4049 char_offset++;
4111 continue; 4050 continue;
4112 4051
4113 invalid_code: 4052 invalid_code:
4144 int *charbuf_end = charbuf + coding->charbuf_used; 4083 int *charbuf_end = charbuf + coding->charbuf_used;
4145 unsigned char *dst = coding->destination + coding->produced; 4084 unsigned char *dst = coding->destination + coding->produced;
4146 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4085 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4147 int safe_room = 4; 4086 int safe_room = 4;
4148 int produced_chars = 0; 4087 int produced_chars = 0;
4149 Lisp_Object attrs, eol_type, charset_list, val; 4088 Lisp_Object attrs, charset_list, val;
4150 int ascii_compatible; 4089 int ascii_compatible;
4151 struct charset *charset_roman, *charset_kanji, *charset_kana; 4090 struct charset *charset_roman, *charset_kanji, *charset_kana;
4152 int c; 4091 int c;
4153 4092
4154 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4093 CODING_GET_INFO (coding, attrs, charset_list);
4155 val = charset_list; 4094 val = charset_list;
4156 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4095 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4157 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4096 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4158 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); 4097 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4159 4098
4219 int *charbuf_end = charbuf + coding->charbuf_used; 4158 int *charbuf_end = charbuf + coding->charbuf_used;
4220 unsigned char *dst = coding->destination + coding->produced; 4159 unsigned char *dst = coding->destination + coding->produced;
4221 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4160 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4222 int safe_room = 4; 4161 int safe_room = 4;
4223 int produced_chars = 0; 4162 int produced_chars = 0;
4224 Lisp_Object attrs, eol_type, charset_list, val; 4163 Lisp_Object attrs, charset_list, val;
4225 int ascii_compatible; 4164 int ascii_compatible;
4226 struct charset *charset_roman, *charset_big5; 4165 struct charset *charset_roman, *charset_big5;
4227 int c; 4166 int c;
4228 4167
4229 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4168 CODING_GET_INFO (coding, attrs, charset_list);
4230 val = charset_list; 4169 val = charset_list;
4231 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4170 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4232 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); 4171 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4233 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); 4172 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4234 4173
4338 int consumed_chars = 0; 4277 int consumed_chars = 0;
4339 int multibytep = coding->src_multibyte; 4278 int multibytep = coding->src_multibyte;
4340 struct ccl_program ccl; 4279 struct ccl_program ccl;
4341 int source_charbuf[1024]; 4280 int source_charbuf[1024];
4342 int source_byteidx[1024]; 4281 int source_byteidx[1024];
4343 Lisp_Object attrs, eol_type, charset_list; 4282 Lisp_Object attrs, charset_list;
4344 4283
4345 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4284 CODING_GET_INFO (coding, attrs, charset_list);
4346 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); 4285 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4347 4286
4348 while (src < src_end) 4287 while (src < src_end)
4349 { 4288 {
4350 const unsigned char *p = src; 4289 const unsigned char *p = src;
4418 unsigned char *dst = coding->destination + coding->produced; 4357 unsigned char *dst = coding->destination + coding->produced;
4419 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4358 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4420 unsigned char *adjusted_dst_end = dst_end - 1; 4359 unsigned char *adjusted_dst_end = dst_end - 1;
4421 int destination_charbuf[1024]; 4360 int destination_charbuf[1024];
4422 int i, produced_chars = 0; 4361 int i, produced_chars = 0;
4423 Lisp_Object attrs, eol_type, charset_list; 4362 Lisp_Object attrs, charset_list;
4424 4363
4425 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4364 CODING_GET_INFO (coding, attrs, charset_list);
4426 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); 4365 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4427 4366
4428 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; 4367 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4429 ccl.dst_multibyte = coding->dst_multibyte; 4368 ccl.dst_multibyte = coding->dst_multibyte;
4430 4369
4619 const unsigned char *src_base; 4558 const unsigned char *src_base;
4620 int *charbuf = coding->charbuf; 4559 int *charbuf = coding->charbuf;
4621 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 4560 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4622 int consumed_chars = 0, consumed_chars_base; 4561 int consumed_chars = 0, consumed_chars_base;
4623 int multibytep = coding->src_multibyte; 4562 int multibytep = coding->src_multibyte;
4624 Lisp_Object attrs, eol_type, charset_list, valids; 4563 Lisp_Object attrs, charset_list, valids;
4625 int char_offset = coding->produced_char; 4564 int char_offset = coding->produced_char;
4626 int last_offset = char_offset; 4565 int last_offset = char_offset;
4627 int last_id = charset_ascii; 4566 int last_id = charset_ascii;
4628 4567
4629 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4568 CODING_GET_INFO (coding, attrs, charset_list);
4630 valids = AREF (attrs, coding_attr_charset_valids); 4569 valids = AREF (attrs, coding_attr_charset_valids);
4631 4570
4632 while (1) 4571 while (1)
4633 { 4572 {
4634 int c; 4573 int c;
4574 Lisp_Object val;
4575 struct charset *charset;
4576 int dim;
4577 int len = 1;
4578 unsigned code;
4635 4579
4636 src_base = src; 4580 src_base = src;
4637 consumed_chars_base = consumed_chars; 4581 consumed_chars_base = consumed_chars;
4638 4582
4639 if (charbuf >= charbuf_end) 4583 if (charbuf >= charbuf_end)
4640 break; 4584 break;
4641 4585
4642 ONE_MORE_BYTE (c); 4586 ONE_MORE_BYTE (c);
4643 if (c == '\r') 4587 code = c;
4644 { 4588
4645 /* Here we assume that no charset maps '\r' to something 4589 val = AREF (valids, c);
4646 else. */ 4590 if (NILP (val))
4647 if (EQ (eol_type, Qdos)) 4591 goto invalid_code;
4592 if (INTEGERP (val))
4593 {
4594 charset = CHARSET_FROM_ID (XFASTINT (val));
4595 dim = CHARSET_DIMENSION (charset);
4596 while (len < dim)
4648 { 4597 {
4649 if (src == src_end) 4598 ONE_MORE_BYTE (c);
4650 { 4599 code = (code << 8) | c;
4651 coding->result = CODING_RESULT_INSUFFICIENT_SRC; 4600 len++;
4652 goto no_more_source;
4653 }
4654 if (*src == '\n')
4655 ONE_MORE_BYTE (c);
4656 } 4601 }
4657 else if (EQ (eol_type, Qmac)) 4602 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4658 c = '\n'; 4603 charset, code, c);
4659 } 4604 }
4660 else 4605 else
4661 { 4606 {
4662 Lisp_Object val; 4607 /* VAL is a list of charset IDs. It is assured that the
4663 struct charset *charset; 4608 list is sorted by charset dimensions (smaller one
4664 int dim; 4609 comes first). */
4665 int len = 1; 4610 while (CONSP (val))
4666 unsigned code = c;
4667
4668 val = AREF (valids, c);
4669 if (NILP (val))
4670 goto invalid_code;
4671 if (INTEGERP (val))
4672 { 4611 {
4673 charset = CHARSET_FROM_ID (XFASTINT (val)); 4612 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4674 dim = CHARSET_DIMENSION (charset); 4613 dim = CHARSET_DIMENSION (charset);
4675 while (len < dim) 4614 while (len < dim)
4676 { 4615 {
4677 ONE_MORE_BYTE (c); 4616 ONE_MORE_BYTE (c);
4678 code = (code << 8) | c; 4617 code = (code << 8) | c;
4679 len++; 4618 len++;
4680 } 4619 }
4681 CODING_DECODE_CHAR (coding, src, src_base, src_end, 4620 CODING_DECODE_CHAR (coding, src, src_base,
4682 charset, code, c); 4621 src_end, charset, code, c);
4622 if (c >= 0)
4623 break;
4624 val = XCDR (val);
4683 } 4625 }
4684 else 4626 }
4685 { 4627 if (c < 0)
4686 /* VAL is a list of charset IDs. It is assured that the 4628 goto invalid_code;
4687 list is sorted by charset dimensions (smaller one 4629 if (charset->id != charset_ascii
4688 comes first). */ 4630 && last_id != charset->id)
4689 while (CONSP (val)) 4631 {
4690 { 4632 if (last_id != charset_ascii)
4691 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); 4633 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4692 dim = CHARSET_DIMENSION (charset); 4634 last_id = charset->id;
4693 while (len < dim) 4635 last_offset = char_offset;
4694 { 4636 }
4695 ONE_MORE_BYTE (c); 4637
4696 code = (code << 8) | c;
4697 len++;
4698 }
4699 CODING_DECODE_CHAR (coding, src, src_base,
4700 src_end, charset, code, c);
4701 if (c >= 0)
4702 break;
4703 val = XCDR (val);
4704 }
4705 }
4706 if (c < 0)
4707 goto invalid_code;
4708 if (charset->id != charset_ascii
4709 && last_id != charset->id)
4710 {
4711 if (last_id != charset_ascii)
4712 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4713 last_id = charset->id;
4714 last_offset = char_offset;
4715 }
4716 }
4717 *charbuf++ = c; 4638 *charbuf++ = c;
4718 char_offset++; 4639 char_offset++;
4719 continue; 4640 continue;
4720 4641
4721 invalid_code: 4642 invalid_code:
4744 int *charbuf_end = charbuf + coding->charbuf_used; 4665 int *charbuf_end = charbuf + coding->charbuf_used;
4745 unsigned char *dst = coding->destination + coding->produced; 4666 unsigned char *dst = coding->destination + coding->produced;
4746 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4667 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4747 int safe_room = MAX_MULTIBYTE_LENGTH; 4668 int safe_room = MAX_MULTIBYTE_LENGTH;
4748 int produced_chars = 0; 4669 int produced_chars = 0;
4749 Lisp_Object attrs, eol_type, charset_list; 4670 Lisp_Object attrs, charset_list;
4750 int ascii_compatible; 4671 int ascii_compatible;
4751 int c; 4672 int c;
4752 4673
4753 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4674 CODING_GET_INFO (coding, attrs, charset_list);
4754 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); 4675 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4755 4676
4756 while (charbuf < charbuf_end) 4677 while (charbuf < charbuf_end)
4757 { 4678 {
4758 struct charset *charset; 4679 struct charset *charset;
5248 } 5169 }
5249 return eol_seen; 5170 return eol_seen;
5250 } 5171 }
5251 5172
5252 5173
5253 static void 5174 static Lisp_Object
5254 adjust_coding_eol_type (coding, eol_seen) 5175 adjust_coding_eol_type (coding, eol_seen)
5255 struct coding_system *coding; 5176 struct coding_system *coding;
5256 int eol_seen; 5177 int eol_seen;
5257 { 5178 {
5258 Lisp_Object eol_type; 5179 Lisp_Object eol_type;
5259 5180
5260 eol_type = CODING_ID_EOL_TYPE (coding->id); 5181 eol_type = CODING_ID_EOL_TYPE (coding->id);
5261 if (eol_seen & EOL_SEEN_LF) 5182 if (eol_seen & EOL_SEEN_LF)
5262 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); 5183 {
5184 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5185 eol_type = Qunix;
5186 }
5263 else if (eol_seen & EOL_SEEN_CRLF) 5187 else if (eol_seen & EOL_SEEN_CRLF)
5264 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1)); 5188 {
5189 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5190 eol_type = Qdos;
5191 }
5265 else if (eol_seen & EOL_SEEN_CR) 5192 else if (eol_seen & EOL_SEEN_CR)
5266 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2)); 5193 {
5194 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5195 eol_type = Qmac;
5196 }
5197 return eol_type;
5267 } 5198 }
5268 5199
5269 /* Detect how a text specified in CODING is encoded. If a coding 5200 /* Detect how a text specified in CODING is encoded. If a coding
5270 system is detected, update fields of CODING by the detected coding 5201 system is detected, update fields of CODING by the detected coding
5271 system. */ 5202 system. */
5287 now. */ 5218 now. */
5288 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) 5219 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5289 { 5220 {
5290 int c, i; 5221 int c, i;
5291 5222
5292 for (src = coding->source; src < src_end; src++) 5223 for (i = 0, src = coding->source; src < src_end; i++, src++)
5293 { 5224 {
5294 c = *src; 5225 c = *src;
5295 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC 5226 if (c & 0x80 || (c < 0x20 && (c == 0
5227 || c == ISO_CODE_ESC
5296 || c == ISO_CODE_SI 5228 || c == ISO_CODE_SI
5297 || c == ISO_CODE_SO))) 5229 || c == ISO_CODE_SO)))
5298 break; 5230 break;
5299 } 5231 }
5232 /* Skipped bytes must be even for utf-16 detector. */
5233 if (i % 2)
5234 src--;
5300 coding->head_ascii = src - (coding->source + coding->consumed); 5235 coding->head_ascii = src - (coding->source + coding->consumed);
5301 5236
5302 if (coding->head_ascii < coding->src_bytes) 5237 if (coding->head_ascii < coding->src_bytes)
5303 { 5238 {
5304 struct coding_detection_info detect_info; 5239 struct coding_detection_info detect_info;
5322 if (detect_info.found & (1 << category)) 5257 if (detect_info.found & (1 << category))
5323 break; 5258 break;
5324 } 5259 }
5325 else if ((*(this->detector)) (coding, &detect_info) 5260 else if ((*(this->detector)) (coding, &detect_info)
5326 && detect_info.found & (1 << category)) 5261 && detect_info.found & (1 << category))
5327 break; 5262 {
5263 if (category == coding_category_utf_16_auto)
5264 {
5265 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5266 category = coding_category_utf_16_le;
5267 else
5268 category = coding_category_utf_16_be;
5269 }
5270 break;
5271 }
5328 } 5272 }
5329 if (i < coding_category_raw_text) 5273 if (i < coding_category_raw_text)
5330 setup_coding_system (CODING_ID_NAME (this->id), coding); 5274 setup_coding_system (CODING_ID_NAME (this->id), coding);
5331 else if (detect_info.rejected == CATEGORY_MASK_ANY) 5275 else if (detect_info.rejected == CATEGORY_MASK_ANY)
5332 setup_coding_system (Qraw_text, coding); 5276 setup_coding_system (Qraw_text, coding);
5338 setup_coding_system (CODING_ID_NAME (this->id), coding); 5282 setup_coding_system (CODING_ID_NAME (this->id), coding);
5339 break; 5283 break;
5340 } 5284 }
5341 } 5285 }
5342 } 5286 }
5343 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16)) 5287 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5288 == coding_category_utf_16_auto)
5344 { 5289 {
5345 Lisp_Object coding_systems; 5290 Lisp_Object coding_systems;
5346 struct coding_detection_info detect_info; 5291 struct coding_detection_info detect_info;
5347 5292
5348 coding_systems 5293 coding_systems
5349 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); 5294 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5350 detect_info.found = detect_info.rejected = 0; 5295 detect_info.found = detect_info.rejected = 0;
5351 if (CONSP (coding_systems) 5296 if (CONSP (coding_systems)
5352 && detect_coding_utf_16 (coding, &detect_info) 5297 && detect_coding_utf_16 (coding, &detect_info))
5353 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5354 | CATEGORY_MASK_UTF_16_BE)))
5355 { 5298 {
5356 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) 5299 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5357 setup_coding_system (XCAR (coding_systems), coding); 5300 setup_coding_system (XCAR (coding_systems), coding);
5358 else 5301 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5359 setup_coding_system (XCDR (coding_systems), coding); 5302 setup_coding_system (XCDR (coding_systems), coding);
5360 } 5303 }
5361 }
5362
5363 attrs = CODING_ID_ATTRS (coding->id);
5364 coding_type = CODING_ATTR_TYPE (attrs);
5365
5366 /* If we have not yet decided the EOL type, detect it now. But, the
5367 detection is impossible for a CCL based coding system, in which
5368 case, we detct the EOL type after decoding. */
5369 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5370 && ! EQ (coding_type, Qccl))
5371 {
5372 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5373 (enum coding_category) XINT (CODING_ATTR_CATEGORY (attrs)));
5374
5375 if (eol_seen != EOL_SEEN_NONE)
5376 adjust_coding_eol_type (coding, eol_seen);
5377 } 5304 }
5378 } 5305 }
5379 5306
5380 5307
5381 static void 5308 static void
5382 decode_eol (coding) 5309 decode_eol (coding)
5383 struct coding_system *coding; 5310 struct coding_system *coding;
5384 { 5311 {
5385 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))) 5312 Lisp_Object eol_type;
5386 { 5313 unsigned char *p, *pbeg, *pend;
5387 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); 5314
5388 unsigned char *pend = p + coding->produced; 5315 eol_type = CODING_ID_EOL_TYPE (coding->id);
5316 if (EQ (eol_type, Qunix))
5317 return;
5318
5319 if (NILP (coding->dst_object))
5320 pbeg = coding->destination;
5321 else
5322 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5323 pend = pbeg + coding->produced;
5324
5325 if (VECTORP (eol_type))
5326 {
5389 int eol_seen = EOL_SEEN_NONE; 5327 int eol_seen = EOL_SEEN_NONE;
5390 5328
5391 for (; p < pend; p++) 5329 for (p = pbeg; p < pend; p++)
5392 { 5330 {
5393 if (*p == '\n') 5331 if (*p == '\n')
5394 eol_seen |= EOL_SEEN_LF; 5332 eol_seen |= EOL_SEEN_LF;
5395 else if (*p == '\r') 5333 else if (*p == '\r')
5396 { 5334 {
5401 } 5339 }
5402 else 5340 else
5403 eol_seen |= EOL_SEEN_CR; 5341 eol_seen |= EOL_SEEN_CR;
5404 } 5342 }
5405 } 5343 }
5344 if (eol_seen != EOL_SEEN_NONE
5345 && eol_seen != EOL_SEEN_LF
5346 && eol_seen != EOL_SEEN_CRLF
5347 && eol_seen != EOL_SEEN_CR)
5348 eol_seen = EOL_SEEN_LF;
5406 if (eol_seen != EOL_SEEN_NONE) 5349 if (eol_seen != EOL_SEEN_NONE)
5407 adjust_coding_eol_type (coding, eol_seen); 5350 eol_type = adjust_coding_eol_type (coding, eol_seen);
5408 } 5351 }
5409 5352
5410 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac)) 5353 if (EQ (eol_type, Qmac))
5411 { 5354 {
5412 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); 5355 for (p = pbeg; p < pend; p++)
5413 unsigned char *pend = p + coding->produced;
5414
5415 for (; p < pend; p++)
5416 if (*p == '\r') 5356 if (*p == '\r')
5417 *p = '\n'; 5357 *p = '\n';
5418 } 5358 }
5419 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos)) 5359 else if (EQ (eol_type, Qdos))
5420 { 5360 {
5421 unsigned char *p, *pbeg, *pend; 5361 int n = 0;
5422 Lisp_Object undo_list; 5362
5423 5363 if (NILP (coding->dst_object))
5424 move_gap_both (coding->dst_pos + coding->produced_char, 5364 {
5425 coding->dst_pos_byte + coding->produced); 5365 for (p = pend - 2; p >= pbeg; p--)
5426 undo_list = current_buffer->undo_list; 5366 if (*p == '\r')
5427 current_buffer->undo_list = Qt; 5367 {
5428 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0); 5368 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5429 current_buffer->undo_list = undo_list; 5369 n++;
5430 pbeg = GPT_ADDR; 5370 }
5431 pend = pbeg + coding->produced; 5371 }
5432 5372 else
5433 for (p = pend - 1; p >= pbeg; p--) 5373 {
5434 if (*p == '\r') 5374 for (p = pend - 2; p >= pbeg; p--)
5435 { 5375 if (*p == '\r')
5436 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1); 5376 {
5437 pend--; 5377 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5438 } 5378 int pos = BYTE_TO_CHAR (pos_byte);
5439 coding->produced_char -= coding->produced - (pend - pbeg); 5379
5440 coding->produced = pend - pbeg; 5380 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5441 insert_from_gap (coding->produced_char, coding->produced); 5381 n++;
5382 }
5383 }
5384 coding->produced -= n;
5385 coding->produced_char -= n;
5442 } 5386 }
5443 } 5387 }
5444 5388
5445 static void 5389 static void
5446 translate_chars (coding, table) 5390 translate_chars (coding, table)
5794 static int 5738 static int
5795 decode_coding (coding) 5739 decode_coding (coding)
5796 struct coding_system *coding; 5740 struct coding_system *coding;
5797 { 5741 {
5798 Lisp_Object attrs; 5742 Lisp_Object attrs;
5743 Lisp_Object undo_list;
5799 5744
5800 if (BUFFERP (coding->src_object) 5745 if (BUFFERP (coding->src_object)
5801 && coding->src_pos > 0 5746 && coding->src_pos > 0
5802 && coding->src_pos < GPT 5747 && coding->src_pos < GPT
5803 && coding->src_pos + coding->src_chars > GPT) 5748 && coding->src_pos + coding->src_chars > GPT)
5804 move_gap_both (coding->src_pos, coding->src_pos_byte); 5749 move_gap_both (coding->src_pos, coding->src_pos_byte);
5805 5750
5751 undo_list = Qt;
5806 if (BUFFERP (coding->dst_object)) 5752 if (BUFFERP (coding->dst_object))
5807 { 5753 {
5808 if (current_buffer != XBUFFER (coding->dst_object)) 5754 if (current_buffer != XBUFFER (coding->dst_object))
5809 set_buffer_internal (XBUFFER (coding->dst_object)); 5755 set_buffer_internal (XBUFFER (coding->dst_object));
5810 if (GPT != PT) 5756 if (GPT != PT)
5811 move_gap_both (PT, PT_BYTE); 5757 move_gap_both (PT, PT_BYTE);
5758 undo_list = current_buffer->undo_list;
5759 current_buffer->undo_list = Qt;
5812 } 5760 }
5813 5761
5814 coding->consumed = coding->consumed_char = 0; 5762 coding->consumed = coding->consumed_char = 0;
5815 coding->produced = coding->produced_char = 0; 5763 coding->produced = coding->produced_char = 0;
5816 coding->chars_at_source = 0; 5764 coding->chars_at_source = 0;
5836 produce_annotation (coding); 5784 produce_annotation (coding);
5837 } 5785 }
5838 while (coding->consumed < coding->src_bytes 5786 while (coding->consumed < coding->src_bytes
5839 && ! coding->result); 5787 && ! coding->result);
5840 5788
5841 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5842 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5843 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5844 decode_eol (coding);
5845
5846 coding->carryover_bytes = 0; 5789 coding->carryover_bytes = 0;
5847 if (coding->consumed < coding->src_bytes) 5790 if (coding->consumed < coding->src_bytes)
5848 { 5791 {
5849 int nbytes = coding->src_bytes - coding->consumed; 5792 int nbytes = coding->src_bytes - coding->consumed;
5850 const unsigned char *src; 5793 const unsigned char *src;
5878 *p++ = *src++; 5821 *p++ = *src++;
5879 } 5822 }
5880 coding->consumed = coding->src_bytes; 5823 coding->consumed = coding->src_bytes;
5881 } 5824 }
5882 5825
5826 if (BUFFERP (coding->dst_object))
5827 {
5828 current_buffer->undo_list = undo_list;
5829 record_insert (coding->dst_pos, coding->produced_char);
5830 }
5831 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5832 decode_eol (coding);
5883 return coding->result; 5833 return coding->result;
5884 } 5834 }
5885 5835
5886 5836
5887 /* Extract an annotation datum from a composition starting at POS and 5837 /* Extract an annotation datum from a composition starting at POS and
6037 stop = stop_charset = pos; 5987 stop = stop_charset = pos;
6038 else 5988 else
6039 stop_charset = end_pos; 5989 stop_charset = end_pos;
6040 } 5990 }
6041 5991
6042 /* Compensate for CRLF and annotation. */ 5992 /* Compensate for CRLF and conversion. */
6043 buf_end -= 1 + MAX_ANNOTATION_LENGTH; 5993 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6044 while (buf < buf_end) 5994 while (buf < buf_end)
6045 { 5995 {
6046 if (pos == stop) 5996 if (pos == stop)
6047 { 5997 {
6152 6102
6153 return (coding->result); 6103 return (coding->result);
6154 } 6104 }
6155 6105
6156 6106
6157 /* Stack of working buffers used in code conversion. An nil element 6107 /* Name (or base name) of work buffer for code conversion. */
6158 means that the code conversion of that level is not using a working 6108 static Lisp_Object Vcode_conversion_workbuf_name;
6159 buffer. */ 6109
6160 Lisp_Object Vcode_conversion_work_buf_list; 6110 /* A working buffer used by the top level conversion. Once it is
6161 6111 created, it is never destroyed. It has the name
6162 /* A working buffer used by the top level conversion. */ 6112 Vcode_conversion_workbuf_name. The other working buffers are
6163 Lisp_Object Vcode_conversion_reused_work_buf; 6113 destroyed after the use is finished, and their names are modified
6164 6114 versions of Vcode_conversion_workbuf_name. */
6165 6115 static Lisp_Object Vcode_conversion_reused_workbuf;
6166 /* Return a working buffer that can be freely used by the following 6116
6167 code conversion. MULTIBYTEP specifies the multibyteness of the 6117 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6168 buffer. */ 6118 static int reused_workbuf_in_use;
6119
6120
6121 /* Return a working buffer of code convesion. MULTIBYTE specifies the
6122 multibyteness of returning buffer. */
6169 6123
6170 Lisp_Object 6124 Lisp_Object
6171 make_conversion_work_buffer (multibytep, depth) 6125 make_conversion_work_buffer (multibyte)
6172 int multibytep, depth; 6126 {
6173 { 6127 Lisp_Object name, workbuf;
6174 struct buffer *current = current_buffer; 6128 struct buffer *current;
6175 Lisp_Object buf, name; 6129
6176 6130 if (reused_workbuf_in_use++)
6177 if (depth == 0) 6131 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6178 {
6179 if (NILP (Vcode_conversion_reused_work_buf))
6180 Vcode_conversion_reused_work_buf
6181 = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
6182 buf = Vcode_conversion_reused_work_buf;
6183 }
6184 else 6132 else
6185 { 6133 name = Vcode_conversion_workbuf_name;
6186 if (depth < 0) 6134 workbuf = Fget_buffer_create (name);
6187 { 6135 current = current_buffer;
6188 name = build_string (" *code-converting-work*"); 6136 set_buffer_internal (XBUFFER (workbuf));
6189 name = Fgenerate_new_buffer_name (name, Qnil); 6137 Ferase_buffer ();
6190 }
6191 else
6192 {
6193 char str[128];
6194
6195 sprintf (str, " *code-converting-work*<%d>", depth);
6196 name = build_string (str);
6197 }
6198 buf = Fget_buffer_create (name);
6199 }
6200 set_buffer_internal (XBUFFER (buf));
6201 current_buffer->undo_list = Qt; 6138 current_buffer->undo_list = Qt;
6202 Ferase_buffer (); 6139 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6203 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
6204 set_buffer_internal (current); 6140 set_buffer_internal (current);
6205 return buf; 6141 return workbuf;
6206 } 6142 }
6143
6207 6144
6208 static Lisp_Object 6145 static Lisp_Object
6209 code_conversion_restore (buffer) 6146 code_conversion_restore (arg)
6210 Lisp_Object buffer; 6147 Lisp_Object arg;
6211 { 6148 {
6212 Lisp_Object workbuf; 6149 Lisp_Object current, workbuf;
6213 6150
6214 workbuf = XCAR (Vcode_conversion_work_buf_list); 6151 current = XCAR (arg);
6215 if (! NILP (workbuf) 6152 workbuf = XCDR (arg);
6216 && ! EQ (workbuf, Vcode_conversion_reused_work_buf) 6153 if (! NILP (workbuf))
6217 && ! NILP (Fbuffer_live_p (workbuf))) 6154 {
6218 Fkill_buffer (workbuf); 6155 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6219 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list); 6156 reused_workbuf_in_use = 0;
6220 set_buffer_internal (XBUFFER (buffer)); 6157 else if (! NILP (Fbuffer_live_p (workbuf)))
6158 Fkill_buffer (workbuf);
6159 }
6160 set_buffer_internal (XBUFFER (current));
6221 return Qnil; 6161 return Qnil;
6222 } 6162 }
6223 6163
6224 static Lisp_Object 6164 Lisp_Object
6225 code_conversion_save (buffer, with_work_buf, multibyte) 6165 code_conversion_save (with_work_buf, multibyte)
6226 Lisp_Object buffer;
6227 int with_work_buf, multibyte; 6166 int with_work_buf, multibyte;
6228 { 6167 {
6229 Lisp_Object workbuf; 6168 Lisp_Object workbuf = Qnil;
6230 6169
6231 if (with_work_buf) 6170 if (with_work_buf)
6232 { 6171 workbuf = make_conversion_work_buffer (multibyte);
6233 int depth = XINT (Flength (Vcode_conversion_work_buf_list)); 6172 record_unwind_protect (code_conversion_restore,
6234 6173 Fcons (Fcurrent_buffer (), workbuf));
6235 workbuf = make_conversion_work_buffer (multibyte, depth);
6236 }
6237 else
6238 workbuf = Qnil;
6239 Vcode_conversion_work_buf_list
6240 = Fcons (workbuf, Vcode_conversion_work_buf_list);
6241 record_unwind_protect (code_conversion_restore, buffer);
6242 return workbuf; 6174 return workbuf;
6243 } 6175 }
6244 6176
6245 int 6177 int
6246 decode_coding_gap (coding, chars, bytes) 6178 decode_coding_gap (coding, chars, bytes)
6247 struct coding_system *coding; 6179 struct coding_system *coding;
6248 EMACS_INT chars, bytes; 6180 EMACS_INT chars, bytes;
6249 { 6181 {
6250 int count = specpdl_ptr - specpdl; 6182 int count = specpdl_ptr - specpdl;
6251 Lisp_Object attrs; 6183 Lisp_Object attrs;
6252 Lisp_Object buffer; 6184
6253 6185 code_conversion_save (0, 0);
6254 buffer = Fcurrent_buffer (); 6186
6255 code_conversion_save (buffer, 0, 0); 6187 coding->src_object = Fcurrent_buffer ();
6256
6257 coding->src_object = buffer;
6258 coding->src_chars = chars;
6259 coding->src_bytes = bytes;
6260 coding->src_pos = -chars;
6261 coding->src_pos_byte = -bytes;
6262 coding->src_multibyte = chars < bytes;
6263 coding->dst_object = buffer;
6264 coding->dst_pos = PT;
6265 coding->dst_pos_byte = PT_BYTE;
6266 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6267 coding->mode |= CODING_MODE_LAST_BLOCK;
6268
6269 if (CODING_REQUIRE_DETECTION (coding))
6270 detect_coding (coding);
6271
6272 decode_coding (coding);
6273
6274 attrs = CODING_ID_ATTRS (coding->id);
6275 if (! NILP (CODING_ATTR_POST_READ (attrs)))
6276 {
6277 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6278 Lisp_Object val;
6279
6280 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6281 val = call1 (CODING_ATTR_POST_READ (attrs),
6282 make_number (coding->produced_char));
6283 CHECK_NATNUM (val);
6284 coding->produced_char += Z - prev_Z;
6285 coding->produced += Z_BYTE - prev_Z_BYTE;
6286 }
6287
6288 unbind_to (count, Qnil);
6289 return coding->result;
6290 }
6291
6292 int
6293 encode_coding_gap (coding, chars, bytes)
6294 struct coding_system *coding;
6295 EMACS_INT chars, bytes;
6296 {
6297 int count = specpdl_ptr - specpdl;
6298 Lisp_Object buffer;
6299
6300 buffer = Fcurrent_buffer ();
6301 code_conversion_save (buffer, 0, 0);
6302
6303 coding->src_object = buffer;
6304 coding->src_chars = chars; 6188 coding->src_chars = chars;
6305 coding->src_bytes = bytes; 6189 coding->src_bytes = bytes;
6306 coding->src_pos = -chars; 6190 coding->src_pos = -chars;
6307 coding->src_pos_byte = -bytes; 6191 coding->src_pos_byte = -bytes;
6308 coding->src_multibyte = chars < bytes; 6192 coding->src_multibyte = chars < bytes;
6309 coding->dst_object = coding->src_object; 6193 coding->dst_object = coding->src_object;
6310 coding->dst_pos = PT; 6194 coding->dst_pos = PT;
6311 coding->dst_pos_byte = PT_BYTE; 6195 coding->dst_pos_byte = PT_BYTE;
6196 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6197 coding->mode |= CODING_MODE_LAST_BLOCK;
6198
6199 if (CODING_REQUIRE_DETECTION (coding))
6200 detect_coding (coding);
6201
6202 decode_coding (coding);
6203
6204 attrs = CODING_ID_ATTRS (coding->id);
6205 if (! NILP (CODING_ATTR_POST_READ (attrs)))
6206 {
6207 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6208 Lisp_Object val;
6209
6210 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6211 val = call1 (CODING_ATTR_POST_READ (attrs),
6212 make_number (coding->produced_char));
6213 CHECK_NATNUM (val);
6214 coding->produced_char += Z - prev_Z;
6215 coding->produced += Z_BYTE - prev_Z_BYTE;
6216 }
6217
6218 unbind_to (count, Qnil);
6219 return coding->result;
6220 }
6221
6222 int
6223 encode_coding_gap (coding, chars, bytes)
6224 struct coding_system *coding;
6225 EMACS_INT chars, bytes;
6226 {
6227 int count = specpdl_ptr - specpdl;
6228
6229 code_conversion_save (0, 0);
6230
6231 coding->src_object = Fcurrent_buffer ();
6232 coding->src_chars = chars;
6233 coding->src_bytes = bytes;
6234 coding->src_pos = -chars;
6235 coding->src_pos_byte = -bytes;
6236 coding->src_multibyte = chars < bytes;
6237 coding->dst_object = coding->src_object;
6238 coding->dst_pos = PT;
6239 coding->dst_pos_byte = PT_BYTE;
6312 6240
6313 encode_coding (coding); 6241 encode_coding (coding);
6314 6242
6315 unbind_to (count, Qnil); 6243 unbind_to (count, Qnil);
6316 return coding->result; 6244 return coding->result;
6407 6335
6408 if (EQ (dst_object, Qt) 6336 if (EQ (dst_object, Qt)
6409 || (! NILP (CODING_ATTR_POST_READ (attrs)) 6337 || (! NILP (CODING_ATTR_POST_READ (attrs))
6410 && NILP (dst_object))) 6338 && NILP (dst_object)))
6411 { 6339 {
6412 coding->dst_object = code_conversion_save (buffer, 1, 1); 6340 coding->dst_object = code_conversion_save (1, 1);
6413 coding->dst_pos = BEG; 6341 coding->dst_pos = BEG;
6414 coding->dst_pos_byte = BEG_BYTE; 6342 coding->dst_pos_byte = BEG_BYTE;
6415 coding->dst_multibyte = 1; 6343 coding->dst_multibyte = 1;
6416 } 6344 }
6417 else if (BUFFERP (dst_object)) 6345 else if (BUFFERP (dst_object))
6418 { 6346 {
6419 code_conversion_save (buffer, 0, 0); 6347 code_conversion_save (0, 0);
6420 coding->dst_object = dst_object; 6348 coding->dst_object = dst_object;
6421 coding->dst_pos = BUF_PT (XBUFFER (dst_object)); 6349 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6422 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); 6350 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6423 coding->dst_multibyte 6351 coding->dst_multibyte
6424 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters); 6352 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6425 } 6353 }
6426 else 6354 else
6427 { 6355 {
6428 code_conversion_save (buffer, 0, 0); 6356 code_conversion_save (0, 0);
6429 coding->dst_object = Qnil; 6357 coding->dst_object = Qnil;
6430 coding->dst_multibyte = 1; 6358 coding->dst_multibyte = 1;
6431 } 6359 }
6432 6360
6433 decode_coding (coding); 6361 decode_coding (coding);
6522 6450
6523 attrs = CODING_ID_ATTRS (coding->id); 6451 attrs = CODING_ID_ATTRS (coding->id);
6524 6452
6525 if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) 6453 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6526 { 6454 {
6527 coding->src_object = code_conversion_save (buffer, 1, 6455 coding->src_object = code_conversion_save (1, coding->src_multibyte);
6528 coding->src_multibyte);
6529 set_buffer_internal (XBUFFER (coding->src_object)); 6456 set_buffer_internal (XBUFFER (coding->src_object));
6530 if (STRINGP (src_object)) 6457 if (STRINGP (src_object))
6531 insert_from_string (src_object, from, from_byte, chars, bytes, 0); 6458 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6532 else if (BUFFERP (src_object)) 6459 else if (BUFFERP (src_object))
6533 insert_from_buffer (XBUFFER (src_object), from, chars, 0); 6460 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6553 coding->src_pos_byte = BEG_BYTE; 6480 coding->src_pos_byte = BEG_BYTE;
6554 coding->src_multibyte = Z < Z_BYTE; 6481 coding->src_multibyte = Z < Z_BYTE;
6555 } 6482 }
6556 else if (STRINGP (src_object)) 6483 else if (STRINGP (src_object))
6557 { 6484 {
6558 code_conversion_save (buffer, 0, 0); 6485 code_conversion_save (0, 0);
6559 coding->src_pos = from; 6486 coding->src_pos = from;
6560 coding->src_pos_byte = from_byte; 6487 coding->src_pos_byte = from_byte;
6561 } 6488 }
6562 else if (BUFFERP (src_object)) 6489 else if (BUFFERP (src_object))
6563 { 6490 {
6564 code_conversion_save (buffer, 0, 0); 6491 code_conversion_save (0, 0);
6565 set_buffer_internal (XBUFFER (src_object)); 6492 set_buffer_internal (XBUFFER (src_object));
6566 if (EQ (src_object, dst_object)) 6493 if (EQ (src_object, dst_object))
6567 { 6494 {
6568 saved_pt = PT, saved_pt_byte = PT_BYTE; 6495 saved_pt = PT, saved_pt_byte = PT_BYTE;
6569 coding->src_object = del_range_1 (from, to, 1, 1); 6496 coding->src_object = del_range_1 (from, to, 1, 1);
6577 coding->src_pos = from; 6504 coding->src_pos = from;
6578 coding->src_pos_byte = from_byte; 6505 coding->src_pos_byte = from_byte;
6579 } 6506 }
6580 } 6507 }
6581 else 6508 else
6582 code_conversion_save (buffer, 0, 0); 6509 code_conversion_save (0, 0);
6583 6510
6584 if (BUFFERP (dst_object)) 6511 if (BUFFERP (dst_object))
6585 { 6512 {
6586 coding->dst_object = dst_object; 6513 coding->dst_object = dst_object;
6587 if (EQ (src_object, dst_object)) 6514 if (EQ (src_object, dst_object))
6733 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format 6660 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6734 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'), 6661 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6735 detect only text-format. */ 6662 detect only text-format. */
6736 6663
6737 Lisp_Object 6664 Lisp_Object
6738 detect_coding_system (src, src_bytes, highest, multibytep, coding_system) 6665 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
6666 coding_system)
6739 const unsigned char *src; 6667 const unsigned char *src;
6740 int src_bytes, highest; 6668 int src_chars, src_bytes, highest;
6741 int multibytep; 6669 int multibytep;
6742 Lisp_Object coding_system; 6670 Lisp_Object coding_system;
6743 { 6671 {
6744 const unsigned char *src_end = src + src_bytes; 6672 const unsigned char *src_end = src + src_bytes;
6745 Lisp_Object attrs, eol_type; 6673 Lisp_Object attrs, eol_type;
6746 Lisp_Object val; 6674 Lisp_Object val;
6747 struct coding_system coding; 6675 struct coding_system coding;
6748 int id; 6676 int id;
6749 struct coding_detection_info detect_info; 6677 struct coding_detection_info detect_info;
6678 enum coding_category base_category;
6750 6679
6751 if (NILP (coding_system)) 6680 if (NILP (coding_system))
6752 coding_system = Qundecided; 6681 coding_system = Qundecided;
6753 setup_coding_system (coding_system, &coding); 6682 setup_coding_system (coding_system, &coding);
6754 attrs = CODING_ID_ATTRS (coding.id); 6683 attrs = CODING_ID_ATTRS (coding.id);
6755 eol_type = CODING_ID_EOL_TYPE (coding.id); 6684 eol_type = CODING_ID_EOL_TYPE (coding.id);
6756 coding_system = CODING_ATTR_BASE_NAME (attrs); 6685 coding_system = CODING_ATTR_BASE_NAME (attrs);
6757 6686
6758 coding.source = src; 6687 coding.source = src;
6688 coding.src_chars = src_chars;
6759 coding.src_bytes = src_bytes; 6689 coding.src_bytes = src_bytes;
6760 coding.src_multibyte = multibytep; 6690 coding.src_multibyte = multibytep;
6761 coding.consumed = 0; 6691 coding.consumed = 0;
6762 coding.mode |= CODING_MODE_LAST_BLOCK; 6692 coding.mode |= CODING_MODE_LAST_BLOCK;
6763 6693
6764 detect_info.checked = detect_info.found = detect_info.rejected = 0; 6694 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6765 6695
6766 /* At first, detect text-format if necessary. */ 6696 /* At first, detect text-format if necessary. */
6767 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided) 6697 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
6698 if (base_category == coding_category_undecided)
6768 { 6699 {
6769 enum coding_category category; 6700 enum coding_category category;
6770 struct coding_system *this; 6701 struct coding_system *this;
6771 int c, i; 6702 int c, i;
6772 6703
6773 for (; src < src_end; src++) 6704 /* Skip all ASCII bytes except for a few ISO2022 controls. */
6705 for (i = 0; src < src_end; i++, src++)
6774 { 6706 {
6775 c = *src; 6707 c = *src;
6776 if (c & 0x80 6708 if (c & 0x80 || (c < 0x20 && (c == 0
6777 || (c < 0x20 && (c == ISO_CODE_ESC 6709 || c == ISO_CODE_ESC
6778 || c == ISO_CODE_SI 6710 || c == ISO_CODE_SI
6779 || c == ISO_CODE_SO))) 6711 || c == ISO_CODE_SO)))
6780 break; 6712 break;
6781 } 6713 }
6714 /* Skipped bytes must be even for utf-16 detecor. */
6715 if (i % 2)
6716 src--;
6782 coding.head_ascii = src - coding.source; 6717 coding.head_ascii = src - coding.source;
6783 6718
6784 if (src < src_end) 6719 if (src < src_end)
6785 for (i = 0; i < coding_category_raw_text; i++) 6720 for (i = 0; i < coding_category_raw_text; i++)
6786 { 6721 {
6803 else 6738 else
6804 { 6739 {
6805 if ((*(this->detector)) (&coding, &detect_info) 6740 if ((*(this->detector)) (&coding, &detect_info)
6806 && highest 6741 && highest
6807 && (detect_info.found & (1 << category))) 6742 && (detect_info.found & (1 << category)))
6808 break; 6743 {
6744 if (category == coding_category_utf_16_auto)
6745 {
6746 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6747 category = coding_category_utf_16_le;
6748 else
6749 category = coding_category_utf_16_be;
6750 }
6751 break;
6752 }
6809 } 6753 }
6810 } 6754 }
6811
6812 6755
6813 if (detect_info.rejected == CATEGORY_MASK_ANY) 6756 if (detect_info.rejected == CATEGORY_MASK_ANY)
6814 { 6757 {
6815 detect_info.found = CATEGORY_MASK_RAW_TEXT; 6758 detect_info.found = CATEGORY_MASK_RAW_TEXT;
6816 id = coding_categories[coding_category_raw_text].id; 6759 id = coding_categories[coding_category_raw_text].id;
6863 id = coding_categories[category].id; 6806 id = coding_categories[category].id;
6864 val = Fcons (make_number (id), val); 6807 val = Fcons (make_number (id), val);
6865 } 6808 }
6866 } 6809 }
6867 detect_info.found |= found; 6810 detect_info.found |= found;
6811 }
6812 }
6813 else if (base_category == coding_category_utf_16_auto)
6814 {
6815 if (detect_coding_utf_16 (&coding, &detect_info))
6816 {
6817 enum coding_category category;
6818 struct coding_system *this;
6819
6820 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6821 this = coding_categories + coding_category_utf_16_le;
6822 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6823 this = coding_categories + coding_category_utf_16_be;
6824 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
6825 this = coding_categories + coding_category_utf_16_be_nosig;
6826 else
6827 this = coding_categories + coding_category_utf_16_le_nosig;
6828 val = Fcons (make_number (this->id), Qnil);
6868 } 6829 }
6869 } 6830 }
6870 else 6831 else
6871 { 6832 {
6872 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); 6833 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6967 6928
6968 if (from < GPT && to >= GPT) 6929 if (from < GPT && to >= GPT)
6969 move_gap_both (to, to_byte); 6930 move_gap_both (to, to_byte);
6970 6931
6971 return detect_coding_system (BYTE_POS_ADDR (from_byte), 6932 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6972 to_byte - from_byte, 6933 to - from, to_byte - from_byte,
6973 !NILP (highest), 6934 !NILP (highest),
6974 !NILP (current_buffer 6935 !NILP (current_buffer
6975 ->enable_multibyte_characters), 6936 ->enable_multibyte_characters),
6976 Qnil); 6937 Qnil);
6977 } 6938 }
6990 (string, highest) 6951 (string, highest)
6991 Lisp_Object string, highest; 6952 Lisp_Object string, highest;
6992 { 6953 {
6993 CHECK_STRING (string); 6954 CHECK_STRING (string);
6994 6955
6995 return detect_coding_system (SDATA (string), SBYTES (string), 6956 return detect_coding_system (SDATA (string),
6957 SCHARS (string), SBYTES (string),
6996 !NILP (highest), STRING_MULTIBYTE (string), 6958 !NILP (highest), STRING_MULTIBYTE (string),
6997 Qnil); 6959 Qnil);
6998 } 6960 }
6999 6961
7000 6962
8615 iso_code_class[i] = ISO_control_1; 8577 iso_code_class[i] = ISO_control_1;
8616 for (i = 0xA1; i < 0xFF; i++) 8578 for (i = 0xA1; i < 0xFF; i++)
8617 iso_code_class[i] = ISO_graphic_plane_1; 8579 iso_code_class[i] = ISO_graphic_plane_1;
8618 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; 8580 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8619 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF; 8581 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8620 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8621 iso_code_class[ISO_CODE_SO] = ISO_shift_out; 8582 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8622 iso_code_class[ISO_CODE_SI] = ISO_shift_in; 8583 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8623 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; 8584 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8624 iso_code_class[ISO_CODE_ESC] = ISO_escape; 8585 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8625 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; 8586 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8653 Vsjis_coding_system = Qnil; 8614 Vsjis_coding_system = Qnil;
8654 8615
8655 staticpro (&Vbig5_coding_system); 8616 staticpro (&Vbig5_coding_system);
8656 Vbig5_coding_system = Qnil; 8617 Vbig5_coding_system = Qnil;
8657 8618
8658 staticpro (&Vcode_conversion_work_buf_list); 8619 staticpro (&Vcode_conversion_reused_workbuf);
8659 Vcode_conversion_work_buf_list = Qnil; 8620 Vcode_conversion_reused_workbuf = Qnil;
8660 8621
8661 staticpro (&Vcode_conversion_reused_work_buf); 8622 staticpro (&Vcode_conversion_workbuf_name);
8662 Vcode_conversion_reused_work_buf = Qnil; 8623 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
8624
8625 reused_workbuf_in_use = 0;
8663 8626
8664 DEFSYM (Qcharset, "charset"); 8627 DEFSYM (Qcharset, "charset");
8665 DEFSYM (Qtarget_idx, "target-idx"); 8628 DEFSYM (Qtarget_idx, "target-idx");
8666 DEFSYM (Qcoding_system_history, "coding-system-history"); 8629 DEFSYM (Qcoding_system_history, "coding-system-history");
8667 Fset (Qcoding_system_history, Qnil); 8630 Fset (Qcoding_system_history, Qnil);