comparison src/coding.c @ 30487:6165da9c89c6

(Qsafe_charsets): This variable deleted. (Qsafe_chars, Vchar_coding_system_table, Qchar_coding_system): New variables. (coding_safe_chars): New function. (CODING_SAFE_CHAR_P): New macro. (CHARSET_OK): New arg C. Call CODING_SAFE_CHAR_P instead of checking safe_charsets member of the coding system. Caller changed. (detect_coding_iso2022): New local variable safe_chars. (DECODE_DESIGNATION): Call CODING_SAFE_CHAR_P instead of checking safe_charsets member of the coding system. (decode_coding_iso2022): New local variable safe_chars. (ENCODE_ISO_CHARACTER_DIMENSION1): Don't check unsafe chars here. (ENCODE_ISO_CHARACTER_DIMENSION2): Likewise. (ENCODE_ISO_CHARACTER): Arguments changed. Caller changed. (ENCODE_UNSAFE_CHARACTER): New macro. (encode_coding_iso2022): New local variable safe_chars. Check unsafe chars. (setup_coding_system): Delete the code to initialize coding->safe_charses (intersection, find_safe_codings): New functions. (Ffind_coding_systems_region_internal): New function. (syms_of_coding): Defsubr it. Initialize Qsafe_chars, Qsafe_cding_system. Make Vchar_coding_system_table a Lisp variable and initialize it.
author Kenichi Handa <handa@m17n.org>
date Thu, 27 Jul 2000 06:01:19 +0000
parents d81bc1a374d6
children 705b94e152b1
comparison
equal deleted inserted replaced
30486:56dec1b451fe 30487:6165da9c89c6
335 Lisp_Object Qcoding_system, Qeol_type; 335 Lisp_Object Qcoding_system, Qeol_type;
336 Lisp_Object Qbuffer_file_coding_system; 336 Lisp_Object Qbuffer_file_coding_system;
337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion; 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
338 Lisp_Object Qno_conversion, Qundecided; 338 Lisp_Object Qno_conversion, Qundecided;
339 Lisp_Object Qcoding_system_history; 339 Lisp_Object Qcoding_system_history;
340 Lisp_Object Qsafe_charsets; 340 Lisp_Object Qsafe_chars;
341 Lisp_Object Qvalid_codes; 341 Lisp_Object Qvalid_codes;
342 342
343 extern Lisp_Object Qinsert_file_contents, Qwrite_region; 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
345 Lisp_Object Qstart_process, Qopen_network_stream; 345 Lisp_Object Qstart_process, Qopen_network_stream;
468 /* Global flag to tell that we can't call post-read-conversion and 468 /* Global flag to tell that we can't call post-read-conversion and
469 pre-write-conversion functions. Usually the value is zero, but it 469 pre-write-conversion functions. Usually the value is zero, but it
470 is set to 1 temporarily while such functions are running. This is 470 is set to 1 temporarily while such functions are running. This is
471 to avoid infinite recursive call. */ 471 to avoid infinite recursive call. */
472 static int inhibit_pre_post_conversion; 472 static int inhibit_pre_post_conversion;
473
474 /* Char-table containing safe coding systems of each character. */
475 Lisp_Object Vchar_coding_system_table;
476 Lisp_Object Qchar_coding_system;
477
478 /* Return `safe-chars' property of coding system CODING. Don't check
479 validity of CODING. */
480
481 Lisp_Object
482 coding_safe_chars (coding)
483 struct coding_system *coding;
484 {
485 Lisp_Object coding_spec, plist, safe_chars;
486
487 coding_spec = Fget (coding->symbol, Qcoding_system);
488 plist = XVECTOR (coding_spec)->contents[3];
489 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
490 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
491 }
492
493 #define CODING_SAFE_CHAR_P(safe_chars, c) \
494 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
473 495
474 496
475 /*** 2. Emacs internal format (emacs-mule) handlers ***/ 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/
476 498
477 /* Emacs' internal format for encoding multiple character sets is a 499 /* Emacs' internal format for encoding multiple character sets is a
795 COMPOSITION_WITH_RULE_ALTCHARS: 817 COMPOSITION_WITH_RULE_ALTCHARS:
796 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ 818 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
797 819
798 enum iso_code_class_type iso_code_class[256]; 820 enum iso_code_class_type iso_code_class[256];
799 821
800 #define CHARSET_OK(idx, charset) \ 822 #define CHARSET_OK(idx, charset, c) \
801 (coding_system_table[idx] \ 823 (coding_system_table[idx] \
802 && (coding_system_table[idx]->safe_charsets[charset] \ 824 && (charset == CHARSET_ASCII \
803 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \ 825 || (safe_chars = coding_safe_chars (coding_system_table[idx]), \
804 (coding_system_table[idx], charset) \ 826 CODING_SAFE_CHAR_P (safe_chars, c))) \
805 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))) 827 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
828 charset) \
829 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
806 830
807 #define SHIFT_OUT_OK(idx) \ 831 #define SHIFT_OUT_OK(idx) \
808 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) 832 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
809 833
810 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 834 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
828 int reg[4], shift_out = 0, single_shifting = 0; 852 int reg[4], shift_out = 0, single_shifting = 0;
829 int c, c1, i, charset; 853 int c, c1, i, charset;
830 /* Dummy for ONE_MORE_BYTE. */ 854 /* Dummy for ONE_MORE_BYTE. */
831 struct coding_system dummy_coding; 855 struct coding_system dummy_coding;
832 struct coding_system *coding = &dummy_coding; 856 struct coding_system *coding = &dummy_coding;
857 Lisp_Object safe_chars;
833 858
834 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 859 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
835 while (mask && src < src_end) 860 while (mask && src < src_end)
836 { 861 {
837 ONE_MORE_BYTE (c); 862 ONE_MORE_BYTE (c);
888 /* Invalid escape sequence. Just ignore. */ 913 /* Invalid escape sequence. Just ignore. */
889 break; 914 break;
890 915
891 /* We found a valid designation sequence for CHARSET. */ 916 /* We found a valid designation sequence for CHARSET. */
892 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT; 917 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
893 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset)) 918 c = MAKE_CHAR (charset, 0, 0);
919 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
894 mask_found |= CODING_CATEGORY_MASK_ISO_7; 920 mask_found |= CODING_CATEGORY_MASK_ISO_7;
895 else 921 else
896 mask &= ~CODING_CATEGORY_MASK_ISO_7; 922 mask &= ~CODING_CATEGORY_MASK_ISO_7;
897 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset)) 923 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
898 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; 924 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
899 else 925 else
900 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; 926 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
901 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset)) 927 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
902 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; 928 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
903 else 929 else
904 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; 930 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
905 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset)) 931 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
906 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; 932 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
907 else 933 else
908 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; 934 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
909 break; 935 break;
910 936
1040 : translate_char (translation_table, -1, charset, c1, c2)) 1066 : translate_char (translation_table, -1, charset, c1, c2))
1041 1067
1042 /* Set designation state into CODING. */ 1068 /* Set designation state into CODING. */
1043 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ 1069 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
1044 do { \ 1070 do { \
1045 int charset; \ 1071 int charset, c; \
1046 \ 1072 \
1047 if (final_char < '0' || final_char >= 128) \ 1073 if (final_char < '0' || final_char >= 128) \
1048 goto label_invalid_code; \ 1074 goto label_invalid_code; \
1049 charset = ISO_CHARSET_TABLE (make_number (dimension), \ 1075 charset = ISO_CHARSET_TABLE (make_number (dimension), \
1050 make_number (chars), \ 1076 make_number (chars), \
1051 make_number (final_char)); \ 1077 make_number (final_char)); \
1078 c = MAKE_CHAR (charset, 0, 0); \
1052 if (charset >= 0 \ 1079 if (charset >= 0 \
1053 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \ 1080 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1054 || coding->safe_charsets[charset])) \ 1081 || CODING_SAFE_CHAR_P (safe_chars, c))) \
1055 { \ 1082 { \
1056 if (coding->spec.iso2022.last_invalid_designation_register == 0 \ 1083 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
1057 && reg == 0 \ 1084 && reg == 0 \
1058 && charset == CHARSET_ASCII) \ 1085 && charset == CHARSET_ASCII) \
1059 { \ 1086 { \
1236 destination area to produce a character (within macro 1263 destination area to produce a character (within macro
1237 EMIT_CHAR). */ 1264 EMIT_CHAR). */
1238 unsigned char *src_base; 1265 unsigned char *src_base;
1239 int c, charset; 1266 int c, charset;
1240 Lisp_Object translation_table; 1267 Lisp_Object translation_table;
1268 Lisp_Object safe_chars;
1269
1270 safe_chars = coding_safe_chars (coding);
1241 1271
1242 if (NILP (Venable_character_translation)) 1272 if (NILP (Venable_character_translation))
1243 translation_table = Qnil; 1273 translation_table = Qnil;
1244 else 1274 else
1245 { 1275 {
1682 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \ 1712 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1683 { \ 1713 { \
1684 *dst++ = c1 | 0x80; \ 1714 *dst++ = c1 | 0x80; \
1685 break; \ 1715 break; \
1686 } \ 1716 } \
1687 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1688 && !coding->safe_charsets[charset]) \
1689 { \
1690 /* We should not encode this character, instead produce one or \
1691 two `?'s. */ \
1692 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1693 if (CHARSET_WIDTH (charset) == 2) \
1694 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1695 break; \
1696 } \
1697 else \ 1717 else \
1698 /* Since CHARSET is not yet invoked to any graphic planes, we \ 1718 /* Since CHARSET is not yet invoked to any graphic planes, we \
1699 must invoke it, or, at first, designate it to some graphic \ 1719 must invoke it, or, at first, designate it to some graphic \
1700 register. Then repeat the loop to actually produce the \ 1720 register. Then repeat the loop to actually produce the \
1701 character. */ \ 1721 character. */ \
1725 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \ 1745 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1726 { \ 1746 { \
1727 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ 1747 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1728 break; \ 1748 break; \
1729 } \ 1749 } \
1730 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1731 && !coding->safe_charsets[charset]) \
1732 { \
1733 /* We should not encode this character, instead produce one or \
1734 two `?'s. */ \
1735 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1736 if (CHARSET_WIDTH (charset) == 2) \
1737 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1738 break; \
1739 } \
1740 else \ 1750 else \
1741 /* Since CHARSET is not yet invoked to any graphic planes, we \ 1751 /* Since CHARSET is not yet invoked to any graphic planes, we \
1742 must invoke it, or, at first, designate it to some graphic \ 1752 must invoke it, or, at first, designate it to some graphic \
1743 register. Then repeat the loop to actually produce the \ 1753 register. Then repeat the loop to actually produce the \
1744 character. */ \ 1754 character. */ \
1745 dst = encode_invocation_designation (charset, coding, dst); \ 1755 dst = encode_invocation_designation (charset, coding, dst); \
1746 } while (1) 1756 } while (1)
1747 1757
1748 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ 1758 #define ENCODE_ISO_CHARACTER(c) \
1759 do { \
1760 int charset, c1, c2; \
1761 \
1762 SPLIT_CHAR (c, charset, c1, c2); \
1763 if (CHARSET_DEFINED_P (charset)) \
1764 { \
1765 if (CHARSET_DIMENSION (charset) == 1) \
1766 { \
1767 if (charset == CHARSET_ASCII \
1768 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1769 charset = charset_latin_jisx0201; \
1770 ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \
1771 } \
1772 else \
1773 { \
1774 if (charset == charset_jisx0208 \
1775 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1776 charset = charset_jisx0208_1978; \
1777 ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \
1778 } \
1779 } \
1780 else \
1781 { \
1782 *dst++ = c1; \
1783 if (c2 >= 0) \
1784 *dst++ = c2; \
1785 } \
1786 } while (0)
1787
1788
1789 /* Instead of encoding character C, produce one or two `?'s. */
1790
1791 #define ENCODE_UNSAFE_CHARACTER(c) \
1749 do { \ 1792 do { \
1750 int alt_charset = charset; \ 1793 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
1751 \ 1794 if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
1752 if (CHARSET_DEFINED_P (charset)) \ 1795 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
1753 { \
1754 if (CHARSET_DIMENSION (charset) == 1) \
1755 { \
1756 if (charset == CHARSET_ASCII \
1757 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1758 alt_charset = charset_latin_jisx0201; \
1759 ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
1760 } \
1761 else \
1762 { \
1763 if (charset == charset_jisx0208 \
1764 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1765 alt_charset = charset_jisx0208_1978; \
1766 ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
1767 } \
1768 } \
1769 else \
1770 { \
1771 *dst++ = c1; \
1772 if (c2 >= 0) \
1773 *dst++ = c2; \
1774 } \
1775 } while (0) 1796 } while (0)
1797
1776 1798
1777 /* Produce designation and invocation codes at a place pointed by DST 1799 /* Produce designation and invocation codes at a place pointed by DST
1778 to use CHARSET. The element `spec.iso2022' of *CODING is updated. 1800 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1779 Return new DST. */ 1801 Return new DST. */
1780 1802
1995 there's not enough destination area to produce encoded codes 2017 there's not enough destination area to produce encoded codes
1996 (within macro EMIT_BYTES). */ 2018 (within macro EMIT_BYTES). */
1997 unsigned char *src_base; 2019 unsigned char *src_base;
1998 int c; 2020 int c;
1999 Lisp_Object translation_table; 2021 Lisp_Object translation_table;
2022 Lisp_Object safe_chars;
2023
2024 safe_chars = coding_safe_chars (coding);
2000 2025
2001 if (NILP (Venable_character_translation)) 2026 if (NILP (Venable_character_translation))
2002 translation_table = Qnil; 2027 translation_table = Qnil;
2003 else 2028 else
2004 { 2029 {
2009 2034
2010 coding->consumed_char = 0; 2035 coding->consumed_char = 0;
2011 coding->errors = 0; 2036 coding->errors = 0;
2012 while (1) 2037 while (1)
2013 { 2038 {
2014 int charset, c1, c2;
2015
2016 src_base = src; 2039 src_base = src;
2017 2040
2018 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) 2041 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2019 { 2042 {
2020 coding->result = CODING_FINISH_INSUFFICIENT_DST; 2043 coding->result = CODING_FINISH_INSUFFICIENT_DST;
2063 ENCODE_COMPOSITION_RULE (c); 2086 ENCODE_COMPOSITION_RULE (c);
2064 coding->composition_rule_follows = 0; 2087 coding->composition_rule_follows = 0;
2065 } 2088 }
2066 else 2089 else
2067 { 2090 {
2068 SPLIT_CHAR (c, charset, c1, c2); 2091 if (coding->flags & CODING_FLAG_ISO_SAFE
2069 ENCODE_ISO_CHARACTER (charset, c1, c2); 2092 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2093 ENCODE_UNSAFE_CHARACTER (c);
2094 else
2095 ENCODE_ISO_CHARACTER (c);
2070 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) 2096 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2071 coding->composition_rule_follows = 1; 2097 coding->composition_rule_follows = 1;
2072 } 2098 }
2073 continue; 2099 continue;
2074 } 2100 }
2123 ENCODE_RESET_PLANE_AND_REGISTER; 2149 ENCODE_RESET_PLANE_AND_REGISTER;
2124 *dst++ = c; 2150 *dst++ = c;
2125 } 2151 }
2126 } 2152 }
2127 else if (ASCII_BYTE_P (c)) 2153 else if (ASCII_BYTE_P (c))
2128 ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1); 2154 ENCODE_ISO_CHARACTER (c);
2129 else if (SINGLE_BYTE_CHAR_P (c)) 2155 else if (SINGLE_BYTE_CHAR_P (c))
2130 { 2156 {
2131 *dst++ = c; 2157 *dst++ = c;
2132 coding->errors++; 2158 coding->errors++;
2133 } 2159 }
2160 else if (coding->flags & CODING_FLAG_ISO_SAFE
2161 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2162 ENCODE_UNSAFE_CHARACTER (c);
2134 else 2163 else
2135 { 2164 ENCODE_ISO_CHARACTER (c);
2136 SPLIT_CHAR (c, charset, c1, c2);
2137 ENCODE_ISO_CHARACTER (charset, c1, c2);
2138 }
2139 2165
2140 coding->consumed_char++; 2166 coding->consumed_char++;
2141 } 2167 }
2142 2168
2143 label_end_of_loop: 2169 label_end_of_loop:
2968 goto label_invalid_coding_system; 2994 goto label_invalid_coding_system;
2969 } 2995 }
2970 else 2996 else
2971 goto label_invalid_coding_system; 2997 goto label_invalid_coding_system;
2972 2998
2973 val = Fplist_get (plist, Qsafe_charsets);
2974 if (EQ (val, Qt))
2975 {
2976 for (i = 0; i <= MAX_CHARSET; i++)
2977 coding->safe_charsets[i] = 1;
2978 }
2979 else
2980 {
2981 bzero (coding->safe_charsets, MAX_CHARSET + 1);
2982 while (CONSP (val))
2983 {
2984 if ((i = get_charset_id (XCAR (val))) >= 0)
2985 coding->safe_charsets[i] = 1;
2986 val = XCDR (val);
2987 }
2988 }
2989
2990 /* If the coding system has non-nil `composition' property, enable 2999 /* If the coding system has non-nil `composition' property, enable
2991 composition handling. */ 3000 composition handling. */
2992 val = Fplist_get (plist, Qcomposition); 3001 val = Fplist_get (plist, Qcomposition);
2993 if (!NILP (val)) 3002 if (!NILP (val))
2994 coding->composing = COMPOSITION_NO; 3003 coding->composing = COMPOSITION_NO;
5540 return detect_coding_system (XSTRING (string)->data, 5549 return detect_coding_system (XSTRING (string)->data,
5541 STRING_BYTES (XSTRING (string)), 5550 STRING_BYTES (XSTRING (string)),
5542 !NILP (highest)); 5551 !NILP (highest));
5543 } 5552 }
5544 5553
5554 /* Return an intersection of lists L1 and L2. */
5555
5556 static Lisp_Object
5557 intersection (l1, l2)
5558 Lisp_Object l1, l2;
5559 {
5560 Lisp_Object val;
5561
5562 for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5563 {
5564 if (!NILP (Fmemq (XCAR (l1), l2)))
5565 val = Fcons (XCAR (l1), val);
5566 }
5567 return val;
5568 }
5569
5570
5571 /* Subroutine for Fsafe_coding_systems_region_internal.
5572
5573 Return a list of coding systems that safely encode the multibyte
5574 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of
5575 possible coding systems. If it is nil, it means that we have not
5576 yet found any coding systems.
5577
5578 WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An
5579 element of WORK_TABLE is set to t once the element is looked up.
5580
5581 If a non-ASCII single byte char is found, set
5582 *single_byte_char_found to 1. */
5583
5584 static Lisp_Object
5585 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5586 unsigned char *p, *pend;
5587 Lisp_Object safe_codings, work_table;
5588 int *single_byte_char_found;
5589 {
5590 int c, len, idx;
5591 Lisp_Object val;
5592
5593 while (p < pend)
5594 {
5595 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5596 p += len;
5597 if (ASCII_BYTE_P (c))
5598 /* We can ignore ASCII characters here. */
5599 continue;
5600 if (SINGLE_BYTE_CHAR_P (c))
5601 *single_byte_char_found = 1;
5602 if (NILP (safe_codings))
5603 continue;
5604 /* Check the safe coding systems for C. */
5605 val = char_table_ref_and_index (work_table, c, &idx);
5606 if (EQ (val, Qt))
5607 /* This element was already checked. Ignore it. */
5608 continue;
5609 /* Remember that we checked this element. */
5610 CHAR_TABLE_SET (work_table, idx, Qt);
5611
5612 /* If there are some safe coding systems for C and we have
5613 already found the other set of coding systems for the
5614 different characters, get the intersection of them. */
5615 if (!EQ (safe_codings, Qt) && !NILP (val))
5616 val = intersection (safe_codings, val);
5617 safe_codings = val;
5618 }
5619 return safe_codings;
5620 }
5621
5622
5623 /* Return a list of coding systems that safely encode the text between
5624 START and END. If the text contains only ASCII or is unibyte,
5625 return t. */
5626
5627 DEFUN ("find-coding-systems-region-internal",
5628 Ffind_coding_systems_region_internal,
5629 Sfind_coding_systems_region_internal, 2, 2, 0,
5630 "Internal use only.")
5631 (start, end)
5632 Lisp_Object start, end;
5633 {
5634 Lisp_Object work_table, safe_codings;
5635 int non_ascii_p = 0;
5636 int single_byte_char_found = 0;
5637 unsigned char *p1, *p1end, *p2, *p2end, *p;
5638 Lisp_Object args[2];
5639
5640 if (STRINGP (start))
5641 {
5642 if (!STRING_MULTIBYTE (start))
5643 return Qt;
5644 p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5645 p2 = p2end = p1end;
5646 if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5647 non_ascii_p = 1;
5648 }
5649 else
5650 {
5651 int from, to, stop;
5652
5653 CHECK_NUMBER_COERCE_MARKER (start, 0);
5654 CHECK_NUMBER_COERCE_MARKER (end, 1);
5655 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5656 args_out_of_range (start, end);
5657 if (NILP (current_buffer->enable_multibyte_characters))
5658 return Qt;
5659 from = CHAR_TO_BYTE (XINT (start));
5660 to = CHAR_TO_BYTE (XINT (end));
5661 stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5662 p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5663 if (stop == to)
5664 p2 = p2end = p1end;
5665 else
5666 p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5667 if (XINT (end) - XINT (start) != to - from)
5668 non_ascii_p = 1;
5669 }
5670
5671 if (!non_ascii_p)
5672 {
5673 /* We are sure that the text contains no multibyte character.
5674 Check if it contains eight-bit-graphic. */
5675 p = p1;
5676 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5677 if (p == p1end)
5678 {
5679 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5680 if (p == p2end)
5681 return Qt;
5682 }
5683 }
5684
5685 /* The text contains non-ASCII characters. */
5686 work_table = Fcopy_sequence (Vchar_coding_system_table);
5687 safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5688 &single_byte_char_found);
5689 if (p2 < p2end)
5690 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5691 &single_byte_char_found);
5692
5693 if (!single_byte_char_found)
5694 {
5695 /* Append generic coding systems. */
5696 Lisp_Object args[2];
5697 args[0] = safe_codings;
5698 args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5699 make_number (0));
5700 safe_codings = Fappend (make_number (2), args);
5701 }
5702 else
5703 safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5704 return safe_codings;
5705 }
5706
5707
5545 Lisp_Object 5708 Lisp_Object
5546 code_convert_region1 (start, end, coding_system, encodep) 5709 code_convert_region1 (start, end, coding_system, encodep)
5547 Lisp_Object start, end, coding_system; 5710 Lisp_Object start, end, coding_system;
5548 int encodep; 5711 int encodep;
5549 { 5712 {
6194 staticpro (&Qtranslation_table_for_decode); 6357 staticpro (&Qtranslation_table_for_decode);
6195 6358
6196 Qtranslation_table_for_encode = intern ("translation-table-for-encode"); 6359 Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6197 staticpro (&Qtranslation_table_for_encode); 6360 staticpro (&Qtranslation_table_for_encode);
6198 6361
6199 Qsafe_charsets = intern ("safe-charsets"); 6362 Qsafe_chars = intern ("safe-chars");
6200 staticpro (&Qsafe_charsets); 6363 staticpro (&Qsafe_chars);
6364
6365 Qchar_coding_system = intern ("char-coding-system");
6366 staticpro (&Qchar_coding_system);
6367
6368 /* Intern this now in case it isn't already done.
6369 Setting this variable twice is harmless.
6370 But don't staticpro it here--that is done in alloc.c. */
6371 Qchar_table_extra_slots = intern ("char-table-extra-slots");
6372 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6373 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6201 6374
6202 Qvalid_codes = intern ("valid-codes"); 6375 Qvalid_codes = intern ("valid-codes");
6203 staticpro (&Qvalid_codes); 6376 staticpro (&Qvalid_codes);
6204 6377
6205 Qemacs_mule = intern ("emacs-mule"); 6378 Qemacs_mule = intern ("emacs-mule");
6212 defsubr (&Sread_coding_system); 6385 defsubr (&Sread_coding_system);
6213 defsubr (&Sread_non_nil_coding_system); 6386 defsubr (&Sread_non_nil_coding_system);
6214 defsubr (&Scheck_coding_system); 6387 defsubr (&Scheck_coding_system);
6215 defsubr (&Sdetect_coding_region); 6388 defsubr (&Sdetect_coding_region);
6216 defsubr (&Sdetect_coding_string); 6389 defsubr (&Sdetect_coding_string);
6390 defsubr (&Sfind_coding_systems_region_internal);
6217 defsubr (&Sdecode_coding_region); 6391 defsubr (&Sdecode_coding_region);
6218 defsubr (&Sencode_coding_region); 6392 defsubr (&Sencode_coding_region);
6219 defsubr (&Sdecode_coding_string); 6393 defsubr (&Sdecode_coding_string);
6220 defsubr (&Sencode_coding_string); 6394 defsubr (&Sencode_coding_string);
6221 defsubr (&Sdecode_sjis_char); 6395 defsubr (&Sdecode_sjis_char);
6414 coding system which can encode the text in the case that a default\n\ 6588 coding system which can encode the text in the case that a default\n\
6415 coding system used in each operation can't encode the text.\n\ 6589 coding system used in each operation can't encode the text.\n\
6416 \n\ 6590 \n\
6417 The default value is `select-safe-coding-system' (which see)."); 6591 The default value is `select-safe-coding-system' (which see).");
6418 Vselect_safe_coding_system_function = Qnil; 6592 Vselect_safe_coding_system_function = Qnil;
6593
6594 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6595 "Char-table containing safe coding systems of each characters.\n\
6596 Each element doesn't include such generic coding systems that can\n\
6597 encode any characters. They are in the first extra slot.");
6598 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6419 6599
6420 DEFVAR_BOOL ("inhibit-iso-escape-detection", 6600 DEFVAR_BOOL ("inhibit-iso-escape-detection",
6421 &inhibit_iso_escape_detection, 6601 &inhibit_iso_escape_detection,
6422 "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\ 6602 "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
6423 \n\ 6603 \n\