comparison src/coding.c @ 36647:0a75ccbe42b2

(detect_coding_sjis): Do more rigid check. (detect_coding_big5): Likewise. (decode_coding_sjis_big5): Likewise. (Fdetect_coding_region): Call detect_coding_system with tailing anchor byte `\0' for more rigid detection. (Fdetect_coding_string): Likewise.
author Kenichi Handa <handa@m17n.org>
date Thu, 08 Mar 2001 02:01:00 +0000
parents df3c622dba0e
children 5dc88f9ab0ef
comparison
equal deleted inserted replaced
36646:5c9e147d55d8 36647:0a75ccbe42b2
2651 so that it fits in the range below. 2651 so that it fits in the range below.
2652 2652
2653 --- CODE RANGE of SJIS --- 2653 --- CODE RANGE of SJIS ---
2654 (character set) (range) 2654 (character set) (range)
2655 ASCII 0x00 .. 0x7F 2655 ASCII 0x00 .. 0x7F
2656 KATAKANA-JISX0201 0xA0 .. 0xDF 2656 KATAKANA-JISX0201 0xA1 .. 0xDF
2657 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF 2657 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
2658 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC 2658 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
2659 ------------------------------- 2659 -------------------------------
2660 2660
2661 */ 2661 */
2726 struct coding_system *coding = &dummy_coding; 2726 struct coding_system *coding = &dummy_coding;
2727 2727
2728 while (1) 2728 while (1)
2729 { 2729 {
2730 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2730 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2731 if (c >= 0x81) 2731 if (c < 0x80)
2732 { 2732 continue;
2733 if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF)) 2733 if (c == 0x80 || c == 0xA0 || c > 0xEF)
2734 { 2734 return 0;
2735 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2735 if (c <= 0x9F || c >= 0xE0)
2736 if (c < 0x40 || c == 0x7F || c > 0xFC) 2736 {
2737 return 0; 2737 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2738 } 2738 if (c < 0x40 || c == 0x7F || c > 0xFC)
2739 else if (c > 0xDF)
2740 return 0; 2739 return 0;
2741 } 2740 }
2742 } 2741 }
2743 label_end_of_loop: 2742 label_end_of_loop:
2744 return CODING_CATEGORY_MASK_SJIS; 2743 return CODING_CATEGORY_MASK_SJIS;
2759 struct coding_system *coding = &dummy_coding; 2758 struct coding_system *coding = &dummy_coding;
2760 2759
2761 while (1) 2760 while (1)
2762 { 2761 {
2763 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2762 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2764 if (c >= 0xA1) 2763 if (c < 0x80)
2765 { 2764 continue;
2766 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2765 if (c < 0xA1 || c > 0xFE)
2767 if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) 2766 return 0;
2768 return 0; 2767 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2769 } 2768 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2769 return 0;
2770 } 2770 }
2771 label_end_of_loop: 2771 label_end_of_loop:
2772 return CODING_CATEGORY_MASK_BIG5; 2772 return CODING_CATEGORY_MASK_BIG5;
2773 } 2773 }
2774 2774
2942 } 2942 }
2943 else 2943 else
2944 { 2944 {
2945 if (sjis_p) 2945 if (sjis_p)
2946 { 2946 {
2947 if (c1 >= 0xF0) 2947 if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
2948 goto label_invalid_code; 2948 goto label_invalid_code;
2949 if (c1 < 0xA0 || c1 >= 0xE0) 2949 if (c1 <= 0x9F || c1 >= 0xE0)
2950 { 2950 {
2951 /* SJIS -> JISX0208 */ 2951 /* SJIS -> JISX0208 */
2952 ONE_MORE_BYTE (c2); 2952 ONE_MORE_BYTE (c2);
2953 if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC) 2953 if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2954 goto label_invalid_code; 2954 goto label_invalid_code;
2960 charset = charset_katakana_jisx0201; 2960 charset = charset_katakana_jisx0201;
2961 } 2961 }
2962 else 2962 else
2963 { 2963 {
2964 /* BIG5 -> Big5 */ 2964 /* BIG5 -> Big5 */
2965 if (c1 < 0xA1 || c1 > 0xFE) 2965 if (c1 < 0xA0 || c1 > 0xFE)
2966 goto label_invalid_code; 2966 goto label_invalid_code;
2967 ONE_MORE_BYTE (c2); 2967 ONE_MORE_BYTE (c2);
2968 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE) 2968 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2969 goto label_invalid_code; 2969 goto label_invalid_code;
2970 DECODE_BIG5 (c1, c2, charset, c1, c2); 2970 DECODE_BIG5 (c1, c2, charset, c1, c2);
6215 (start, end, highest) 6215 (start, end, highest)
6216 Lisp_Object start, end, highest; 6216 Lisp_Object start, end, highest;
6217 { 6217 {
6218 int from, to; 6218 int from, to;
6219 int from_byte, to_byte; 6219 int from_byte, to_byte;
6220 int include_anchor_byte = 0;
6220 6221
6221 CHECK_NUMBER_COERCE_MARKER (start, 0); 6222 CHECK_NUMBER_COERCE_MARKER (start, 0);
6222 CHECK_NUMBER_COERCE_MARKER (end, 1); 6223 CHECK_NUMBER_COERCE_MARKER (end, 1);
6223 6224
6224 validate_region (&start, &end); 6225 validate_region (&start, &end);
6226 from_byte = CHAR_TO_BYTE (from); 6227 from_byte = CHAR_TO_BYTE (from);
6227 to_byte = CHAR_TO_BYTE (to); 6228 to_byte = CHAR_TO_BYTE (to);
6228 6229
6229 if (from < GPT && to >= GPT) 6230 if (from < GPT && to >= GPT)
6230 move_gap_both (to, to_byte); 6231 move_gap_both (to, to_byte);
6231 6232 if (to == Z || (to == GPT && GAP_SIZE > 0))
6233 include_anchor_byte = 1;
6232 return detect_coding_system (BYTE_POS_ADDR (from_byte), 6234 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6233 to_byte - from_byte, 6235 /* "+ include_anchor_byteq" is to
6236 include the anchor byte `\0'. With
6237 this, code detectors can check if
6238 tailing bytes are valid. */
6239 to_byte - from_byte + include_anchor_byte,
6234 !NILP (highest), 6240 !NILP (highest),
6235 !NILP (current_buffer 6241 !NILP (current_buffer
6236 ->enable_multibyte_characters)); 6242 ->enable_multibyte_characters));
6237 } 6243 }
6238 6244
6251 Lisp_Object string, highest; 6257 Lisp_Object string, highest;
6252 { 6258 {
6253 CHECK_STRING (string, 0); 6259 CHECK_STRING (string, 0);
6254 6260
6255 return detect_coding_system (XSTRING (string)->data, 6261 return detect_coding_system (XSTRING (string)->data,
6256 STRING_BYTES (XSTRING (string)), 6262 /* "+ 1" is to include the anchor byte
6263 `\0'. With this, code detectors can
6264 check if tailing bytes are
6265 valid. */
6266 STRING_BYTES (XSTRING (string)) + 1,
6257 !NILP (highest), 6267 !NILP (highest),
6258 STRING_MULTIBYTE (string)); 6268 STRING_MULTIBYTE (string));
6259 } 6269 }
6260 6270
6261 /* Return an intersection of lists L1 and L2. */ 6271 /* Return an intersection of lists L1 and L2. */