Mercurial > emacs
changeset 89420:c3e67ce6ee0f
(Qsignature, Qendian): Delete these variables.
(syms_of_coding): Don't initialize them.
(CATEGORY_MASK_UTF_16_AUTO): New macro.
(detect_coding_utf_16): Add CATEGORY_MASK_UTF_16_AUTO in
detect_info->found.
(decode_coding_utf_16): Don't detect BOM here.
(encode_coding_utf_16): Produce BOM if CODING_UTF_16_BOM (coding)
is NOT utf_16_without_bom.
(setup_coding_system): For a coding system of type utf-16, check
if the attribute :endian is Qbig or not (not nil or not), and set
CODING_REQUIRE_DETECTION_MASK if BOM detection is required.
(detect_coding): If coding type is utf-16 and BOM detection is
required, detect it.
(Fdefine_coding_system_internal): For a coding system of type
utf-16, check if the attribute :endian is Qbig or not (not nil or
not).
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 06 May 2003 12:28:11 +0000 |
parents | 18e57407a82b |
children | 8357b304ef57 |
files | src/coding.c |
diffstat | 1 files changed, 51 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/src/coding.c Tue May 06 08:22:13 2003 +0000 +++ b/src/coding.c Tue May 06 12:28:11 2003 +0000 @@ -308,7 +308,7 @@ Lisp_Object Qdefault_char; Lisp_Object Qno_conversion, Qundecided; Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5; -Lisp_Object Qsignature, Qendian, Qbig, Qlittle; +Lisp_Object Qbig, Qlittle; Lisp_Object Qcoding_system_history; Lisp_Object Qvalid_codes; @@ -626,6 +626,7 @@ #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else) #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else) #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8) +#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto) #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be) #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le) #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig) @@ -1357,12 +1358,14 @@ if ((c1 == 0xFF) && (c2 == 0xFE)) { - detect_info->found |= CATEGORY_MASK_UTF_16_LE; + detect_info->found |= (CATEGORY_MASK_UTF_16_LE + | CATEGORY_MASK_UTF_16_AUTO); detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; } else if ((c1 == 0xFE) && (c2 == 0xFF)) { - detect_info->found |= CATEGORY_MASK_UTF_16_BE; + detect_info->found |= (CATEGORY_MASK_UTF_16_BE + | CATEGORY_MASK_UTF_16_AUTO); detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; } no_more_source: @@ -1387,7 +1390,7 @@ CODING_GET_INFO (coding, attr, eol_type, charset_list); - if (bom != utf_16_without_bom) + if (bom == utf_16_with_bom) { int c, c1, c2; @@ -1395,33 +1398,22 @@ ONE_MORE_BYTE (c1); ONE_MORE_BYTE (c2); c = (c1 << 8) | c2; - if (bom == utf_16_with_bom) - { - if (endian == utf_16_big_endian - ? c != 0xFEFF : c != 0xFFFE) - { - /* We are sure that there's enouph room at CHARBUF. */ - *charbuf++ = c1; - *charbuf++ = c2; - coding->errors++; - } - } - else + + if (endian == utf_16_big_endian + ? c != 0xFEFF : c != 0xFFFE) { - if (c == 0xFEFF) - CODING_UTF_16_ENDIAN (coding) - = endian = utf_16_big_endian; - else if (c == 0xFFFE) - CODING_UTF_16_ENDIAN (coding) - = endian = utf_16_little_endian; - else - { - CODING_UTF_16_ENDIAN (coding) - = endian = utf_16_big_endian; - src = src_base; - } + /* The first two bytes are not BOM. Treat them as bytes + for a normal character. */ + src = src_base; + coding->errors++; } - CODING_UTF_16_BOM (coding) = utf_16_with_bom; + CODING_UTF_16_BOM (coding) = utf_16_without_bom; + } + else if (bom == utf_16_detect_bom) + { + /* We have already tried to detect BOM and failed in + detect_coding. */ + CODING_UTF_16_BOM (coding) = utf_16_without_bom; } while (1) @@ -1494,7 +1486,7 @@ CODING_GET_INFO (coding, attrs, eol_type, charset_list); - if (bom == utf_16_with_bom) + if (bom != utf_16_without_bom) { ASSURE_DESTINATION (safe_room); if (big_endian) @@ -4859,7 +4851,7 @@ : EQ (val, Qt) ? utf_16_with_bom : utf_16_without_bom); val = AREF (attrs, coding_attr_utf_16_endian); - CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian + CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian : utf_16_little_endian); CODING_UTF_16_SURROGATE (coding) = 0; coding->detector = detect_coding_utf_16; @@ -4867,6 +4859,8 @@ coding->encoder = encode_coding_utf_16; coding->common_flags |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); + if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom) + coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; } else if (EQ (coding_type, Qccl)) { @@ -5285,6 +5279,25 @@ } } } + else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16)) + { + Lisp_Object coding_systems; + struct coding_detection_info detect_info; + + coding_systems + = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); + detect_info.found = detect_info.rejected = 0; + if (CONSP (coding_systems) + && detect_coding_utf_16 (coding, &detect_info) + && (detect_info.found & (CATEGORY_MASK_UTF_16_LE + | CATEGORY_MASK_UTF_16_BE))) + { + if (detect_info.found & CATEGORY_MASK_UTF_16_LE) + setup_coding_system (XCAR (coding_systems), coding); + else + setup_coding_system (XCDR (coding_systems), coding); + } + } attrs = CODING_ID_ATTRS (coding->id); coding_type = CODING_ATTR_TYPE (attrs); @@ -7957,15 +7970,20 @@ ASET (attrs, coding_attr_utf_16_bom, bom); endian = args[coding_arg_utf16_endian]; + CHECK_SYMBOL (endian); + if (NILP (endian)) + endian = Qbig; + else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle)) + error ("Invalid endian: %s", XSYMBOL (endian)->name->data); ASET (attrs, coding_attr_utf_16_endian, endian); category = (CONSP (bom) ? coding_category_utf_16_auto : NILP (bom) - ? (NILP (endian) + ? (EQ (endian, Qbig) ? coding_category_utf_16_be_nosig : coding_category_utf_16_le_nosig) - : (NILP (endian) + : (EQ (endian, Qbig) ? coding_category_utf_16_be : coding_category_utf_16_le)); } @@ -8407,8 +8425,6 @@ DEFSYM (Qutf_8, "utf-8"); DEFSYM (Qutf_16, "utf-16"); - DEFSYM (Qsignature, "signature"); - DEFSYM (Qendian, "endian"); DEFSYM (Qbig, "big"); DEFSYM (Qlittle, "little");