Mercurial > emacs
comparison src/coding.c @ 30487:6165da9c89c6
(Qsafe_charsets): This variable deleted.
(Qsafe_chars, Vchar_coding_system_table, Qchar_coding_system): New
variables.
(coding_safe_chars): New function.
(CODING_SAFE_CHAR_P): New macro.
(CHARSET_OK): New arg C. Call CODING_SAFE_CHAR_P instead of
checking safe_charsets member of the coding system. Caller
changed.
(detect_coding_iso2022): New local variable safe_chars.
(DECODE_DESIGNATION): Call CODING_SAFE_CHAR_P instead of checking
safe_charsets member of the coding system.
(decode_coding_iso2022): New local variable safe_chars.
(ENCODE_ISO_CHARACTER_DIMENSION1): Don't check unsafe chars here.
(ENCODE_ISO_CHARACTER_DIMENSION2): Likewise.
(ENCODE_ISO_CHARACTER): Arguments changed. Caller changed.
(ENCODE_UNSAFE_CHARACTER): New macro.
(encode_coding_iso2022): New local variable safe_chars. Check
unsafe chars.
(setup_coding_system): Delete the code to initialize
coding->safe_charses
(intersection, find_safe_codings): New functions.
(Ffind_coding_systems_region_internal): New function.
(syms_of_coding): Defsubr it. Initialize Qsafe_chars,
Qsafe_cding_system. Make Vchar_coding_system_table a Lisp
variable and initialize it.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 27 Jul 2000 06:01:19 +0000 |
parents | d81bc1a374d6 |
children | 705b94e152b1 |
comparison
equal
deleted
inserted
replaced
30486:56dec1b451fe | 30487:6165da9c89c6 |
---|---|
335 Lisp_Object Qcoding_system, Qeol_type; | 335 Lisp_Object Qcoding_system, Qeol_type; |
336 Lisp_Object Qbuffer_file_coding_system; | 336 Lisp_Object Qbuffer_file_coding_system; |
337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
338 Lisp_Object Qno_conversion, Qundecided; | 338 Lisp_Object Qno_conversion, Qundecided; |
339 Lisp_Object Qcoding_system_history; | 339 Lisp_Object Qcoding_system_history; |
340 Lisp_Object Qsafe_charsets; | 340 Lisp_Object Qsafe_chars; |
341 Lisp_Object Qvalid_codes; | 341 Lisp_Object Qvalid_codes; |
342 | 342 |
343 extern Lisp_Object Qinsert_file_contents, Qwrite_region; | 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region; |
344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; | 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; |
345 Lisp_Object Qstart_process, Qopen_network_stream; | 345 Lisp_Object Qstart_process, Qopen_network_stream; |
468 /* Global flag to tell that we can't call post-read-conversion and | 468 /* Global flag to tell that we can't call post-read-conversion and |
469 pre-write-conversion functions. Usually the value is zero, but it | 469 pre-write-conversion functions. Usually the value is zero, but it |
470 is set to 1 temporarily while such functions are running. This is | 470 is set to 1 temporarily while such functions are running. This is |
471 to avoid infinite recursive call. */ | 471 to avoid infinite recursive call. */ |
472 static int inhibit_pre_post_conversion; | 472 static int inhibit_pre_post_conversion; |
473 | |
474 /* Char-table containing safe coding systems of each character. */ | |
475 Lisp_Object Vchar_coding_system_table; | |
476 Lisp_Object Qchar_coding_system; | |
477 | |
478 /* Return `safe-chars' property of coding system CODING. Don't check | |
479 validity of CODING. */ | |
480 | |
481 Lisp_Object | |
482 coding_safe_chars (coding) | |
483 struct coding_system *coding; | |
484 { | |
485 Lisp_Object coding_spec, plist, safe_chars; | |
486 | |
487 coding_spec = Fget (coding->symbol, Qcoding_system); | |
488 plist = XVECTOR (coding_spec)->contents[3]; | |
489 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); | |
490 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); | |
491 } | |
492 | |
493 #define CODING_SAFE_CHAR_P(safe_chars, c) \ | |
494 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c))) | |
473 | 495 |
474 | 496 |
475 /*** 2. Emacs internal format (emacs-mule) handlers ***/ | 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/ |
476 | 498 |
477 /* Emacs' internal format for encoding multiple character sets is a | 499 /* Emacs' internal format for encoding multiple character sets is a |
795 COMPOSITION_WITH_RULE_ALTCHARS: | 817 COMPOSITION_WITH_RULE_ALTCHARS: |
796 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | 818 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ |
797 | 819 |
798 enum iso_code_class_type iso_code_class[256]; | 820 enum iso_code_class_type iso_code_class[256]; |
799 | 821 |
800 #define CHARSET_OK(idx, charset) \ | 822 #define CHARSET_OK(idx, charset, c) \ |
801 (coding_system_table[idx] \ | 823 (coding_system_table[idx] \ |
802 && (coding_system_table[idx]->safe_charsets[charset] \ | 824 && (charset == CHARSET_ASCII \ |
803 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \ | 825 || (safe_chars = coding_safe_chars (coding_system_table[idx]), \ |
804 (coding_system_table[idx], charset) \ | 826 CODING_SAFE_CHAR_P (safe_chars, c))) \ |
805 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))) | 827 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \ |
828 charset) \ | |
829 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) | |
806 | 830 |
807 #define SHIFT_OUT_OK(idx) \ | 831 #define SHIFT_OUT_OK(idx) \ |
808 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) | 832 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) |
809 | 833 |
810 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 834 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
828 int reg[4], shift_out = 0, single_shifting = 0; | 852 int reg[4], shift_out = 0, single_shifting = 0; |
829 int c, c1, i, charset; | 853 int c, c1, i, charset; |
830 /* Dummy for ONE_MORE_BYTE. */ | 854 /* Dummy for ONE_MORE_BYTE. */ |
831 struct coding_system dummy_coding; | 855 struct coding_system dummy_coding; |
832 struct coding_system *coding = &dummy_coding; | 856 struct coding_system *coding = &dummy_coding; |
857 Lisp_Object safe_chars; | |
833 | 858 |
834 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; | 859 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; |
835 while (mask && src < src_end) | 860 while (mask && src < src_end) |
836 { | 861 { |
837 ONE_MORE_BYTE (c); | 862 ONE_MORE_BYTE (c); |
888 /* Invalid escape sequence. Just ignore. */ | 913 /* Invalid escape sequence. Just ignore. */ |
889 break; | 914 break; |
890 | 915 |
891 /* We found a valid designation sequence for CHARSET. */ | 916 /* We found a valid designation sequence for CHARSET. */ |
892 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT; | 917 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT; |
893 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset)) | 918 c = MAKE_CHAR (charset, 0, 0); |
919 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c)) | |
894 mask_found |= CODING_CATEGORY_MASK_ISO_7; | 920 mask_found |= CODING_CATEGORY_MASK_ISO_7; |
895 else | 921 else |
896 mask &= ~CODING_CATEGORY_MASK_ISO_7; | 922 mask &= ~CODING_CATEGORY_MASK_ISO_7; |
897 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset)) | 923 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c)) |
898 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; | 924 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; |
899 else | 925 else |
900 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; | 926 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; |
901 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset)) | 927 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c)) |
902 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; | 928 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; |
903 else | 929 else |
904 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; | 930 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; |
905 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset)) | 931 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c)) |
906 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; | 932 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; |
907 else | 933 else |
908 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; | 934 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; |
909 break; | 935 break; |
910 | 936 |
1040 : translate_char (translation_table, -1, charset, c1, c2)) | 1066 : translate_char (translation_table, -1, charset, c1, c2)) |
1041 | 1067 |
1042 /* Set designation state into CODING. */ | 1068 /* Set designation state into CODING. */ |
1043 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ | 1069 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ |
1044 do { \ | 1070 do { \ |
1045 int charset; \ | 1071 int charset, c; \ |
1046 \ | 1072 \ |
1047 if (final_char < '0' || final_char >= 128) \ | 1073 if (final_char < '0' || final_char >= 128) \ |
1048 goto label_invalid_code; \ | 1074 goto label_invalid_code; \ |
1049 charset = ISO_CHARSET_TABLE (make_number (dimension), \ | 1075 charset = ISO_CHARSET_TABLE (make_number (dimension), \ |
1050 make_number (chars), \ | 1076 make_number (chars), \ |
1051 make_number (final_char)); \ | 1077 make_number (final_char)); \ |
1078 c = MAKE_CHAR (charset, 0, 0); \ | |
1052 if (charset >= 0 \ | 1079 if (charset >= 0 \ |
1053 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \ | 1080 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \ |
1054 || coding->safe_charsets[charset])) \ | 1081 || CODING_SAFE_CHAR_P (safe_chars, c))) \ |
1055 { \ | 1082 { \ |
1056 if (coding->spec.iso2022.last_invalid_designation_register == 0 \ | 1083 if (coding->spec.iso2022.last_invalid_designation_register == 0 \ |
1057 && reg == 0 \ | 1084 && reg == 0 \ |
1058 && charset == CHARSET_ASCII) \ | 1085 && charset == CHARSET_ASCII) \ |
1059 { \ | 1086 { \ |
1236 destination area to produce a character (within macro | 1263 destination area to produce a character (within macro |
1237 EMIT_CHAR). */ | 1264 EMIT_CHAR). */ |
1238 unsigned char *src_base; | 1265 unsigned char *src_base; |
1239 int c, charset; | 1266 int c, charset; |
1240 Lisp_Object translation_table; | 1267 Lisp_Object translation_table; |
1268 Lisp_Object safe_chars; | |
1269 | |
1270 safe_chars = coding_safe_chars (coding); | |
1241 | 1271 |
1242 if (NILP (Venable_character_translation)) | 1272 if (NILP (Venable_character_translation)) |
1243 translation_table = Qnil; | 1273 translation_table = Qnil; |
1244 else | 1274 else |
1245 { | 1275 { |
1682 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \ | 1712 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \ |
1683 { \ | 1713 { \ |
1684 *dst++ = c1 | 0x80; \ | 1714 *dst++ = c1 | 0x80; \ |
1685 break; \ | 1715 break; \ |
1686 } \ | 1716 } \ |
1687 else if (coding->flags & CODING_FLAG_ISO_SAFE \ | |
1688 && !coding->safe_charsets[charset]) \ | |
1689 { \ | |
1690 /* We should not encode this character, instead produce one or \ | |
1691 two `?'s. */ \ | |
1692 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | |
1693 if (CHARSET_WIDTH (charset) == 2) \ | |
1694 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | |
1695 break; \ | |
1696 } \ | |
1697 else \ | 1717 else \ |
1698 /* Since CHARSET is not yet invoked to any graphic planes, we \ | 1718 /* Since CHARSET is not yet invoked to any graphic planes, we \ |
1699 must invoke it, or, at first, designate it to some graphic \ | 1719 must invoke it, or, at first, designate it to some graphic \ |
1700 register. Then repeat the loop to actually produce the \ | 1720 register. Then repeat the loop to actually produce the \ |
1701 character. */ \ | 1721 character. */ \ |
1725 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \ | 1745 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \ |
1726 { \ | 1746 { \ |
1727 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ | 1747 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ |
1728 break; \ | 1748 break; \ |
1729 } \ | 1749 } \ |
1730 else if (coding->flags & CODING_FLAG_ISO_SAFE \ | |
1731 && !coding->safe_charsets[charset]) \ | |
1732 { \ | |
1733 /* We should not encode this character, instead produce one or \ | |
1734 two `?'s. */ \ | |
1735 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | |
1736 if (CHARSET_WIDTH (charset) == 2) \ | |
1737 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | |
1738 break; \ | |
1739 } \ | |
1740 else \ | 1750 else \ |
1741 /* Since CHARSET is not yet invoked to any graphic planes, we \ | 1751 /* Since CHARSET is not yet invoked to any graphic planes, we \ |
1742 must invoke it, or, at first, designate it to some graphic \ | 1752 must invoke it, or, at first, designate it to some graphic \ |
1743 register. Then repeat the loop to actually produce the \ | 1753 register. Then repeat the loop to actually produce the \ |
1744 character. */ \ | 1754 character. */ \ |
1745 dst = encode_invocation_designation (charset, coding, dst); \ | 1755 dst = encode_invocation_designation (charset, coding, dst); \ |
1746 } while (1) | 1756 } while (1) |
1747 | 1757 |
1748 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ | 1758 #define ENCODE_ISO_CHARACTER(c) \ |
1759 do { \ | |
1760 int charset, c1, c2; \ | |
1761 \ | |
1762 SPLIT_CHAR (c, charset, c1, c2); \ | |
1763 if (CHARSET_DEFINED_P (charset)) \ | |
1764 { \ | |
1765 if (CHARSET_DIMENSION (charset) == 1) \ | |
1766 { \ | |
1767 if (charset == CHARSET_ASCII \ | |
1768 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ | |
1769 charset = charset_latin_jisx0201; \ | |
1770 ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \ | |
1771 } \ | |
1772 else \ | |
1773 { \ | |
1774 if (charset == charset_jisx0208 \ | |
1775 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ | |
1776 charset = charset_jisx0208_1978; \ | |
1777 ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \ | |
1778 } \ | |
1779 } \ | |
1780 else \ | |
1781 { \ | |
1782 *dst++ = c1; \ | |
1783 if (c2 >= 0) \ | |
1784 *dst++ = c2; \ | |
1785 } \ | |
1786 } while (0) | |
1787 | |
1788 | |
1789 /* Instead of encoding character C, produce one or two `?'s. */ | |
1790 | |
1791 #define ENCODE_UNSAFE_CHARACTER(c) \ | |
1749 do { \ | 1792 do { \ |
1750 int alt_charset = charset; \ | 1793 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ |
1751 \ | 1794 if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \ |
1752 if (CHARSET_DEFINED_P (charset)) \ | 1795 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ |
1753 { \ | |
1754 if (CHARSET_DIMENSION (charset) == 1) \ | |
1755 { \ | |
1756 if (charset == CHARSET_ASCII \ | |
1757 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ | |
1758 alt_charset = charset_latin_jisx0201; \ | |
1759 ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \ | |
1760 } \ | |
1761 else \ | |
1762 { \ | |
1763 if (charset == charset_jisx0208 \ | |
1764 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ | |
1765 alt_charset = charset_jisx0208_1978; \ | |
1766 ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \ | |
1767 } \ | |
1768 } \ | |
1769 else \ | |
1770 { \ | |
1771 *dst++ = c1; \ | |
1772 if (c2 >= 0) \ | |
1773 *dst++ = c2; \ | |
1774 } \ | |
1775 } while (0) | 1796 } while (0) |
1797 | |
1776 | 1798 |
1777 /* Produce designation and invocation codes at a place pointed by DST | 1799 /* Produce designation and invocation codes at a place pointed by DST |
1778 to use CHARSET. The element `spec.iso2022' of *CODING is updated. | 1800 to use CHARSET. The element `spec.iso2022' of *CODING is updated. |
1779 Return new DST. */ | 1801 Return new DST. */ |
1780 | 1802 |
1995 there's not enough destination area to produce encoded codes | 2017 there's not enough destination area to produce encoded codes |
1996 (within macro EMIT_BYTES). */ | 2018 (within macro EMIT_BYTES). */ |
1997 unsigned char *src_base; | 2019 unsigned char *src_base; |
1998 int c; | 2020 int c; |
1999 Lisp_Object translation_table; | 2021 Lisp_Object translation_table; |
2022 Lisp_Object safe_chars; | |
2023 | |
2024 safe_chars = coding_safe_chars (coding); | |
2000 | 2025 |
2001 if (NILP (Venable_character_translation)) | 2026 if (NILP (Venable_character_translation)) |
2002 translation_table = Qnil; | 2027 translation_table = Qnil; |
2003 else | 2028 else |
2004 { | 2029 { |
2009 | 2034 |
2010 coding->consumed_char = 0; | 2035 coding->consumed_char = 0; |
2011 coding->errors = 0; | 2036 coding->errors = 0; |
2012 while (1) | 2037 while (1) |
2013 { | 2038 { |
2014 int charset, c1, c2; | |
2015 | |
2016 src_base = src; | 2039 src_base = src; |
2017 | 2040 |
2018 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) | 2041 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) |
2019 { | 2042 { |
2020 coding->result = CODING_FINISH_INSUFFICIENT_DST; | 2043 coding->result = CODING_FINISH_INSUFFICIENT_DST; |
2063 ENCODE_COMPOSITION_RULE (c); | 2086 ENCODE_COMPOSITION_RULE (c); |
2064 coding->composition_rule_follows = 0; | 2087 coding->composition_rule_follows = 0; |
2065 } | 2088 } |
2066 else | 2089 else |
2067 { | 2090 { |
2068 SPLIT_CHAR (c, charset, c1, c2); | 2091 if (coding->flags & CODING_FLAG_ISO_SAFE |
2069 ENCODE_ISO_CHARACTER (charset, c1, c2); | 2092 && ! CODING_SAFE_CHAR_P (safe_chars, c)) |
2093 ENCODE_UNSAFE_CHARACTER (c); | |
2094 else | |
2095 ENCODE_ISO_CHARACTER (c); | |
2070 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) | 2096 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) |
2071 coding->composition_rule_follows = 1; | 2097 coding->composition_rule_follows = 1; |
2072 } | 2098 } |
2073 continue; | 2099 continue; |
2074 } | 2100 } |
2123 ENCODE_RESET_PLANE_AND_REGISTER; | 2149 ENCODE_RESET_PLANE_AND_REGISTER; |
2124 *dst++ = c; | 2150 *dst++ = c; |
2125 } | 2151 } |
2126 } | 2152 } |
2127 else if (ASCII_BYTE_P (c)) | 2153 else if (ASCII_BYTE_P (c)) |
2128 ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1); | 2154 ENCODE_ISO_CHARACTER (c); |
2129 else if (SINGLE_BYTE_CHAR_P (c)) | 2155 else if (SINGLE_BYTE_CHAR_P (c)) |
2130 { | 2156 { |
2131 *dst++ = c; | 2157 *dst++ = c; |
2132 coding->errors++; | 2158 coding->errors++; |
2133 } | 2159 } |
2160 else if (coding->flags & CODING_FLAG_ISO_SAFE | |
2161 && ! CODING_SAFE_CHAR_P (safe_chars, c)) | |
2162 ENCODE_UNSAFE_CHARACTER (c); | |
2134 else | 2163 else |
2135 { | 2164 ENCODE_ISO_CHARACTER (c); |
2136 SPLIT_CHAR (c, charset, c1, c2); | |
2137 ENCODE_ISO_CHARACTER (charset, c1, c2); | |
2138 } | |
2139 | 2165 |
2140 coding->consumed_char++; | 2166 coding->consumed_char++; |
2141 } | 2167 } |
2142 | 2168 |
2143 label_end_of_loop: | 2169 label_end_of_loop: |
2968 goto label_invalid_coding_system; | 2994 goto label_invalid_coding_system; |
2969 } | 2995 } |
2970 else | 2996 else |
2971 goto label_invalid_coding_system; | 2997 goto label_invalid_coding_system; |
2972 | 2998 |
2973 val = Fplist_get (plist, Qsafe_charsets); | |
2974 if (EQ (val, Qt)) | |
2975 { | |
2976 for (i = 0; i <= MAX_CHARSET; i++) | |
2977 coding->safe_charsets[i] = 1; | |
2978 } | |
2979 else | |
2980 { | |
2981 bzero (coding->safe_charsets, MAX_CHARSET + 1); | |
2982 while (CONSP (val)) | |
2983 { | |
2984 if ((i = get_charset_id (XCAR (val))) >= 0) | |
2985 coding->safe_charsets[i] = 1; | |
2986 val = XCDR (val); | |
2987 } | |
2988 } | |
2989 | |
2990 /* If the coding system has non-nil `composition' property, enable | 2999 /* If the coding system has non-nil `composition' property, enable |
2991 composition handling. */ | 3000 composition handling. */ |
2992 val = Fplist_get (plist, Qcomposition); | 3001 val = Fplist_get (plist, Qcomposition); |
2993 if (!NILP (val)) | 3002 if (!NILP (val)) |
2994 coding->composing = COMPOSITION_NO; | 3003 coding->composing = COMPOSITION_NO; |
5540 return detect_coding_system (XSTRING (string)->data, | 5549 return detect_coding_system (XSTRING (string)->data, |
5541 STRING_BYTES (XSTRING (string)), | 5550 STRING_BYTES (XSTRING (string)), |
5542 !NILP (highest)); | 5551 !NILP (highest)); |
5543 } | 5552 } |
5544 | 5553 |
5554 /* Return an intersection of lists L1 and L2. */ | |
5555 | |
5556 static Lisp_Object | |
5557 intersection (l1, l2) | |
5558 Lisp_Object l1, l2; | |
5559 { | |
5560 Lisp_Object val; | |
5561 | |
5562 for (val = Qnil; CONSP (l1); l1 = XCDR (l1)) | |
5563 { | |
5564 if (!NILP (Fmemq (XCAR (l1), l2))) | |
5565 val = Fcons (XCAR (l1), val); | |
5566 } | |
5567 return val; | |
5568 } | |
5569 | |
5570 | |
5571 /* Subroutine for Fsafe_coding_systems_region_internal. | |
5572 | |
5573 Return a list of coding systems that safely encode the multibyte | |
5574 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of | |
5575 possible coding systems. If it is nil, it means that we have not | |
5576 yet found any coding systems. | |
5577 | |
5578 WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An | |
5579 element of WORK_TABLE is set to t once the element is looked up. | |
5580 | |
5581 If a non-ASCII single byte char is found, set | |
5582 *single_byte_char_found to 1. */ | |
5583 | |
5584 static Lisp_Object | |
5585 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) | |
5586 unsigned char *p, *pend; | |
5587 Lisp_Object safe_codings, work_table; | |
5588 int *single_byte_char_found; | |
5589 { | |
5590 int c, len, idx; | |
5591 Lisp_Object val; | |
5592 | |
5593 while (p < pend) | |
5594 { | |
5595 c = STRING_CHAR_AND_LENGTH (p, pend - p, len); | |
5596 p += len; | |
5597 if (ASCII_BYTE_P (c)) | |
5598 /* We can ignore ASCII characters here. */ | |
5599 continue; | |
5600 if (SINGLE_BYTE_CHAR_P (c)) | |
5601 *single_byte_char_found = 1; | |
5602 if (NILP (safe_codings)) | |
5603 continue; | |
5604 /* Check the safe coding systems for C. */ | |
5605 val = char_table_ref_and_index (work_table, c, &idx); | |
5606 if (EQ (val, Qt)) | |
5607 /* This element was already checked. Ignore it. */ | |
5608 continue; | |
5609 /* Remember that we checked this element. */ | |
5610 CHAR_TABLE_SET (work_table, idx, Qt); | |
5611 | |
5612 /* If there are some safe coding systems for C and we have | |
5613 already found the other set of coding systems for the | |
5614 different characters, get the intersection of them. */ | |
5615 if (!EQ (safe_codings, Qt) && !NILP (val)) | |
5616 val = intersection (safe_codings, val); | |
5617 safe_codings = val; | |
5618 } | |
5619 return safe_codings; | |
5620 } | |
5621 | |
5622 | |
5623 /* Return a list of coding systems that safely encode the text between | |
5624 START and END. If the text contains only ASCII or is unibyte, | |
5625 return t. */ | |
5626 | |
5627 DEFUN ("find-coding-systems-region-internal", | |
5628 Ffind_coding_systems_region_internal, | |
5629 Sfind_coding_systems_region_internal, 2, 2, 0, | |
5630 "Internal use only.") | |
5631 (start, end) | |
5632 Lisp_Object start, end; | |
5633 { | |
5634 Lisp_Object work_table, safe_codings; | |
5635 int non_ascii_p = 0; | |
5636 int single_byte_char_found = 0; | |
5637 unsigned char *p1, *p1end, *p2, *p2end, *p; | |
5638 Lisp_Object args[2]; | |
5639 | |
5640 if (STRINGP (start)) | |
5641 { | |
5642 if (!STRING_MULTIBYTE (start)) | |
5643 return Qt; | |
5644 p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start)); | |
5645 p2 = p2end = p1end; | |
5646 if (XSTRING (start)->size != STRING_BYTES (XSTRING (start))) | |
5647 non_ascii_p = 1; | |
5648 } | |
5649 else | |
5650 { | |
5651 int from, to, stop; | |
5652 | |
5653 CHECK_NUMBER_COERCE_MARKER (start, 0); | |
5654 CHECK_NUMBER_COERCE_MARKER (end, 1); | |
5655 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) | |
5656 args_out_of_range (start, end); | |
5657 if (NILP (current_buffer->enable_multibyte_characters)) | |
5658 return Qt; | |
5659 from = CHAR_TO_BYTE (XINT (start)); | |
5660 to = CHAR_TO_BYTE (XINT (end)); | |
5661 stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to; | |
5662 p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from); | |
5663 if (stop == to) | |
5664 p2 = p2end = p1end; | |
5665 else | |
5666 p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop); | |
5667 if (XINT (end) - XINT (start) != to - from) | |
5668 non_ascii_p = 1; | |
5669 } | |
5670 | |
5671 if (!non_ascii_p) | |
5672 { | |
5673 /* We are sure that the text contains no multibyte character. | |
5674 Check if it contains eight-bit-graphic. */ | |
5675 p = p1; | |
5676 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++); | |
5677 if (p == p1end) | |
5678 { | |
5679 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++); | |
5680 if (p == p2end) | |
5681 return Qt; | |
5682 } | |
5683 } | |
5684 | |
5685 /* The text contains non-ASCII characters. */ | |
5686 work_table = Fcopy_sequence (Vchar_coding_system_table); | |
5687 safe_codings = find_safe_codings (p1, p1end, Qt, work_table, | |
5688 &single_byte_char_found); | |
5689 if (p2 < p2end) | |
5690 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, | |
5691 &single_byte_char_found); | |
5692 | |
5693 if (!single_byte_char_found) | |
5694 { | |
5695 /* Append generic coding systems. */ | |
5696 Lisp_Object args[2]; | |
5697 args[0] = safe_codings; | |
5698 args[1] = Fchar_table_extra_slot (Vchar_coding_system_table, | |
5699 make_number (0)); | |
5700 safe_codings = Fappend (make_number (2), args); | |
5701 } | |
5702 else | |
5703 safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings)); | |
5704 return safe_codings; | |
5705 } | |
5706 | |
5707 | |
5545 Lisp_Object | 5708 Lisp_Object |
5546 code_convert_region1 (start, end, coding_system, encodep) | 5709 code_convert_region1 (start, end, coding_system, encodep) |
5547 Lisp_Object start, end, coding_system; | 5710 Lisp_Object start, end, coding_system; |
5548 int encodep; | 5711 int encodep; |
5549 { | 5712 { |
6194 staticpro (&Qtranslation_table_for_decode); | 6357 staticpro (&Qtranslation_table_for_decode); |
6195 | 6358 |
6196 Qtranslation_table_for_encode = intern ("translation-table-for-encode"); | 6359 Qtranslation_table_for_encode = intern ("translation-table-for-encode"); |
6197 staticpro (&Qtranslation_table_for_encode); | 6360 staticpro (&Qtranslation_table_for_encode); |
6198 | 6361 |
6199 Qsafe_charsets = intern ("safe-charsets"); | 6362 Qsafe_chars = intern ("safe-chars"); |
6200 staticpro (&Qsafe_charsets); | 6363 staticpro (&Qsafe_chars); |
6364 | |
6365 Qchar_coding_system = intern ("char-coding-system"); | |
6366 staticpro (&Qchar_coding_system); | |
6367 | |
6368 /* Intern this now in case it isn't already done. | |
6369 Setting this variable twice is harmless. | |
6370 But don't staticpro it here--that is done in alloc.c. */ | |
6371 Qchar_table_extra_slots = intern ("char-table-extra-slots"); | |
6372 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); | |
6373 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1)); | |
6201 | 6374 |
6202 Qvalid_codes = intern ("valid-codes"); | 6375 Qvalid_codes = intern ("valid-codes"); |
6203 staticpro (&Qvalid_codes); | 6376 staticpro (&Qvalid_codes); |
6204 | 6377 |
6205 Qemacs_mule = intern ("emacs-mule"); | 6378 Qemacs_mule = intern ("emacs-mule"); |
6212 defsubr (&Sread_coding_system); | 6385 defsubr (&Sread_coding_system); |
6213 defsubr (&Sread_non_nil_coding_system); | 6386 defsubr (&Sread_non_nil_coding_system); |
6214 defsubr (&Scheck_coding_system); | 6387 defsubr (&Scheck_coding_system); |
6215 defsubr (&Sdetect_coding_region); | 6388 defsubr (&Sdetect_coding_region); |
6216 defsubr (&Sdetect_coding_string); | 6389 defsubr (&Sdetect_coding_string); |
6390 defsubr (&Sfind_coding_systems_region_internal); | |
6217 defsubr (&Sdecode_coding_region); | 6391 defsubr (&Sdecode_coding_region); |
6218 defsubr (&Sencode_coding_region); | 6392 defsubr (&Sencode_coding_region); |
6219 defsubr (&Sdecode_coding_string); | 6393 defsubr (&Sdecode_coding_string); |
6220 defsubr (&Sencode_coding_string); | 6394 defsubr (&Sencode_coding_string); |
6221 defsubr (&Sdecode_sjis_char); | 6395 defsubr (&Sdecode_sjis_char); |
6414 coding system which can encode the text in the case that a default\n\ | 6588 coding system which can encode the text in the case that a default\n\ |
6415 coding system used in each operation can't encode the text.\n\ | 6589 coding system used in each operation can't encode the text.\n\ |
6416 \n\ | 6590 \n\ |
6417 The default value is `select-safe-coding-system' (which see)."); | 6591 The default value is `select-safe-coding-system' (which see)."); |
6418 Vselect_safe_coding_system_function = Qnil; | 6592 Vselect_safe_coding_system_function = Qnil; |
6593 | |
6594 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table, | |
6595 "Char-table containing safe coding systems of each characters.\n\ | |
6596 Each element doesn't include such generic coding systems that can\n\ | |
6597 encode any characters. They are in the first extra slot."); | |
6598 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil); | |
6419 | 6599 |
6420 DEFVAR_BOOL ("inhibit-iso-escape-detection", | 6600 DEFVAR_BOOL ("inhibit-iso-escape-detection", |
6421 &inhibit_iso_escape_detection, | 6601 &inhibit_iso_escape_detection, |
6422 "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\ | 6602 "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\ |
6423 \n\ | 6603 \n\ |