Mercurial > emacs
comparison src/coding.c @ 36647:0a75ccbe42b2
(detect_coding_sjis): Do more rigid check.
(detect_coding_big5): Likewise.
(decode_coding_sjis_big5): Likewise.
(Fdetect_coding_region): Call detect_coding_system with tailing
anchor byte `\0' for more rigid detection.
(Fdetect_coding_string): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 08 Mar 2001 02:01:00 +0000 |
parents | df3c622dba0e |
children | 5dc88f9ab0ef |
comparison
equal
deleted
inserted
replaced
36646:5c9e147d55d8 | 36647:0a75ccbe42b2 |
---|---|
2651 so that it fits in the range below. | 2651 so that it fits in the range below. |
2652 | 2652 |
2653 --- CODE RANGE of SJIS --- | 2653 --- CODE RANGE of SJIS --- |
2654 (character set) (range) | 2654 (character set) (range) |
2655 ASCII 0x00 .. 0x7F | 2655 ASCII 0x00 .. 0x7F |
2656 KATAKANA-JISX0201 0xA0 .. 0xDF | 2656 KATAKANA-JISX0201 0xA1 .. 0xDF |
2657 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF | 2657 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF |
2658 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC | 2658 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC |
2659 ------------------------------- | 2659 ------------------------------- |
2660 | 2660 |
2661 */ | 2661 */ |
2726 struct coding_system *coding = &dummy_coding; | 2726 struct coding_system *coding = &dummy_coding; |
2727 | 2727 |
2728 while (1) | 2728 while (1) |
2729 { | 2729 { |
2730 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2730 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
2731 if (c >= 0x81) | 2731 if (c < 0x80) |
2732 { | 2732 continue; |
2733 if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF)) | 2733 if (c == 0x80 || c == 0xA0 || c > 0xEF) |
2734 { | 2734 return 0; |
2735 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2735 if (c <= 0x9F || c >= 0xE0) |
2736 if (c < 0x40 || c == 0x7F || c > 0xFC) | 2736 { |
2737 return 0; | 2737 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
2738 } | 2738 if (c < 0x40 || c == 0x7F || c > 0xFC) |
2739 else if (c > 0xDF) | |
2740 return 0; | 2739 return 0; |
2741 } | 2740 } |
2742 } | 2741 } |
2743 label_end_of_loop: | 2742 label_end_of_loop: |
2744 return CODING_CATEGORY_MASK_SJIS; | 2743 return CODING_CATEGORY_MASK_SJIS; |
2759 struct coding_system *coding = &dummy_coding; | 2758 struct coding_system *coding = &dummy_coding; |
2760 | 2759 |
2761 while (1) | 2760 while (1) |
2762 { | 2761 { |
2763 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2762 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
2764 if (c >= 0xA1) | 2763 if (c < 0x80) |
2765 { | 2764 continue; |
2766 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2765 if (c < 0xA1 || c > 0xFE) |
2767 if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) | 2766 return 0; |
2768 return 0; | 2767 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
2769 } | 2768 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE) |
2769 return 0; | |
2770 } | 2770 } |
2771 label_end_of_loop: | 2771 label_end_of_loop: |
2772 return CODING_CATEGORY_MASK_BIG5; | 2772 return CODING_CATEGORY_MASK_BIG5; |
2773 } | 2773 } |
2774 | 2774 |
2942 } | 2942 } |
2943 else | 2943 else |
2944 { | 2944 { |
2945 if (sjis_p) | 2945 if (sjis_p) |
2946 { | 2946 { |
2947 if (c1 >= 0xF0) | 2947 if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF) |
2948 goto label_invalid_code; | 2948 goto label_invalid_code; |
2949 if (c1 < 0xA0 || c1 >= 0xE0) | 2949 if (c1 <= 0x9F || c1 >= 0xE0) |
2950 { | 2950 { |
2951 /* SJIS -> JISX0208 */ | 2951 /* SJIS -> JISX0208 */ |
2952 ONE_MORE_BYTE (c2); | 2952 ONE_MORE_BYTE (c2); |
2953 if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC) | 2953 if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC) |
2954 goto label_invalid_code; | 2954 goto label_invalid_code; |
2960 charset = charset_katakana_jisx0201; | 2960 charset = charset_katakana_jisx0201; |
2961 } | 2961 } |
2962 else | 2962 else |
2963 { | 2963 { |
2964 /* BIG5 -> Big5 */ | 2964 /* BIG5 -> Big5 */ |
2965 if (c1 < 0xA1 || c1 > 0xFE) | 2965 if (c1 < 0xA0 || c1 > 0xFE) |
2966 goto label_invalid_code; | 2966 goto label_invalid_code; |
2967 ONE_MORE_BYTE (c2); | 2967 ONE_MORE_BYTE (c2); |
2968 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE) | 2968 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE) |
2969 goto label_invalid_code; | 2969 goto label_invalid_code; |
2970 DECODE_BIG5 (c1, c2, charset, c1, c2); | 2970 DECODE_BIG5 (c1, c2, charset, c1, c2); |
6215 (start, end, highest) | 6215 (start, end, highest) |
6216 Lisp_Object start, end, highest; | 6216 Lisp_Object start, end, highest; |
6217 { | 6217 { |
6218 int from, to; | 6218 int from, to; |
6219 int from_byte, to_byte; | 6219 int from_byte, to_byte; |
6220 int include_anchor_byte = 0; | |
6220 | 6221 |
6221 CHECK_NUMBER_COERCE_MARKER (start, 0); | 6222 CHECK_NUMBER_COERCE_MARKER (start, 0); |
6222 CHECK_NUMBER_COERCE_MARKER (end, 1); | 6223 CHECK_NUMBER_COERCE_MARKER (end, 1); |
6223 | 6224 |
6224 validate_region (&start, &end); | 6225 validate_region (&start, &end); |
6226 from_byte = CHAR_TO_BYTE (from); | 6227 from_byte = CHAR_TO_BYTE (from); |
6227 to_byte = CHAR_TO_BYTE (to); | 6228 to_byte = CHAR_TO_BYTE (to); |
6228 | 6229 |
6229 if (from < GPT && to >= GPT) | 6230 if (from < GPT && to >= GPT) |
6230 move_gap_both (to, to_byte); | 6231 move_gap_both (to, to_byte); |
6231 | 6232 if (to == Z || (to == GPT && GAP_SIZE > 0)) |
6233 include_anchor_byte = 1; | |
6232 return detect_coding_system (BYTE_POS_ADDR (from_byte), | 6234 return detect_coding_system (BYTE_POS_ADDR (from_byte), |
6233 to_byte - from_byte, | 6235 /* "+ include_anchor_byteq" is to |
6236 include the anchor byte `\0'. With | |
6237 this, code detectors can check if | |
6238 tailing bytes are valid. */ | |
6239 to_byte - from_byte + include_anchor_byte, | |
6234 !NILP (highest), | 6240 !NILP (highest), |
6235 !NILP (current_buffer | 6241 !NILP (current_buffer |
6236 ->enable_multibyte_characters)); | 6242 ->enable_multibyte_characters)); |
6237 } | 6243 } |
6238 | 6244 |
6251 Lisp_Object string, highest; | 6257 Lisp_Object string, highest; |
6252 { | 6258 { |
6253 CHECK_STRING (string, 0); | 6259 CHECK_STRING (string, 0); |
6254 | 6260 |
6255 return detect_coding_system (XSTRING (string)->data, | 6261 return detect_coding_system (XSTRING (string)->data, |
6256 STRING_BYTES (XSTRING (string)), | 6262 /* "+ 1" is to include the anchor byte |
6263 `\0'. With this, code detectors can | |
6264 check if tailing bytes are | |
6265 valid. */ | |
6266 STRING_BYTES (XSTRING (string)) + 1, | |
6257 !NILP (highest), | 6267 !NILP (highest), |
6258 STRING_MULTIBYTE (string)); | 6268 STRING_MULTIBYTE (string)); |
6259 } | 6269 } |
6260 | 6270 |
6261 /* Return an intersection of lists L1 and L2. */ | 6271 /* Return an intersection of lists L1 and L2. */ |