Mercurial > emacs
comparison src/coding.c @ 95533:831c8ee4d884
(detect_coding): Fix handling of coding->head_ascii.
Be sure to call setup_coding_system when a proper coding system is
found.
(detect_coding_system): Fix handling of coding->head_ascii.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 04 Jun 2008 07:52:46 +0000 |
parents | c99f0a16c077 |
children | 862c7386145c |
comparison
equal
deleted
inserted
replaced
95532:b657ba21e4d3 | 95533:831c8ee4d884 |
---|---|
5780 coding->consumed = coding->consumed_char = 0; | 5780 coding->consumed = coding->consumed_char = 0; |
5781 coding->produced = coding->produced_char = 0; | 5781 coding->produced = coding->produced_char = 0; |
5782 coding_set_source (coding); | 5782 coding_set_source (coding); |
5783 | 5783 |
5784 src_end = coding->source + coding->src_bytes; | 5784 src_end = coding->source + coding->src_bytes; |
5785 coding->head_ascii = 0; | |
5785 | 5786 |
5786 /* If we have not yet decided the text encoding type, detect it | 5787 /* If we have not yet decided the text encoding type, detect it |
5787 now. */ | 5788 now. */ |
5788 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 5789 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
5789 { | 5790 { |
5790 int c, i; | 5791 int c, i; |
5791 struct coding_detection_info detect_info; | 5792 struct coding_detection_info detect_info; |
5792 int null_byte_found = 0, eight_bit_found = 0; | 5793 int null_byte_found = 0, eight_bit_found = 0; |
5793 | 5794 |
5794 detect_info.checked = detect_info.found = detect_info.rejected = 0; | 5795 detect_info.checked = detect_info.found = detect_info.rejected = 0; |
5795 coding->head_ascii = -1; | |
5796 for (src = coding->source; src < src_end; src++) | 5796 for (src = coding->source; src < src_end; src++) |
5797 { | 5797 { |
5798 c = *src; | 5798 c = *src; |
5799 if (c & 0x80) | 5799 if (c & 0x80) |
5800 { | 5800 { |
5801 eight_bit_found = 1; | 5801 eight_bit_found = 1; |
5802 if (coding->head_ascii < 0) | |
5803 coding->head_ascii = src - coding->source; | |
5804 if (null_byte_found) | 5802 if (null_byte_found) |
5805 break; | 5803 break; |
5806 } | 5804 } |
5807 else if (c < 0x20) | 5805 else if (c < 0x20) |
5808 { | 5806 { |
5809 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 5807 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
5810 && ! inhibit_iso_escape_detection | 5808 && ! inhibit_iso_escape_detection |
5811 && ! detect_info.checked) | 5809 && ! detect_info.checked) |
5812 { | 5810 { |
5813 if (coding->head_ascii < 0) | |
5814 coding->head_ascii = src - coding->source; | |
5815 if (detect_coding_iso_2022 (coding, &detect_info)) | 5811 if (detect_coding_iso_2022 (coding, &detect_info)) |
5816 { | 5812 { |
5817 /* We have scanned the whole data. */ | 5813 /* We have scanned the whole data. */ |
5818 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) | 5814 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) |
5819 /* We didn't find an 8-bit code. We may have | 5815 { |
5820 found a null-byte, but it's very rare that | 5816 /* We didn't find an 8-bit code. We may |
5821 a binary file confirm to ISO-2022. */ | 5817 have found a null-byte, but it's very |
5822 src = src_end; | 5818 rare that a binary file confirm to |
5819 ISO-2022. */ | |
5820 src = src_end; | |
5821 coding->head_ascii = src - coding->source; | |
5822 } | |
5823 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; | |
5823 break; | 5824 break; |
5824 } | 5825 } |
5825 } | 5826 } |
5826 else if (! c) | 5827 else if (! c) |
5827 { | 5828 { |
5828 null_byte_found = 1; | 5829 null_byte_found = 1; |
5829 if (eight_bit_found) | 5830 if (eight_bit_found) |
5830 break; | 5831 break; |
5831 } | 5832 } |
5833 coding->head_ascii++; | |
5832 } | 5834 } |
5833 } | 5835 else |
5834 if (coding->head_ascii < 0) | 5836 coding->head_ascii++; |
5835 coding->head_ascii = src - coding->source; | 5837 } |
5836 | 5838 |
5837 if (null_byte_found || eight_bit_found | 5839 if (null_byte_found || eight_bit_found |
5838 || coding->head_ascii < coding->src_bytes | 5840 || coding->head_ascii < coding->src_bytes |
5839 || detect_info.found) | 5841 || detect_info.found) |
5840 { | 5842 { |
5884 category = coding_category_utf_16_be; | 5886 category = coding_category_utf_16_be; |
5885 } | 5887 } |
5886 break; | 5888 break; |
5887 } | 5889 } |
5888 } | 5890 } |
5889 | |
5890 if (i < coding_category_raw_text) | |
5891 setup_coding_system (CODING_ID_NAME (this->id), coding); | |
5892 else if (null_byte_found) | |
5893 setup_coding_system (Qno_conversion, coding); | |
5894 else if ((detect_info.rejected & CATEGORY_MASK_ANY) | |
5895 == CATEGORY_MASK_ANY) | |
5896 setup_coding_system (Qraw_text, coding); | |
5897 else if (detect_info.rejected) | |
5898 for (i = 0; i < coding_category_raw_text; i++) | |
5899 if (! (detect_info.rejected & (1 << coding_priorities[i]))) | |
5900 { | |
5901 this = coding_categories + coding_priorities[i]; | |
5902 setup_coding_system (CODING_ID_NAME (this->id), coding); | |
5903 break; | |
5904 } | |
5905 } | 5891 } |
5892 | |
5893 if (i < coding_category_raw_text) | |
5894 setup_coding_system (CODING_ID_NAME (this->id), coding); | |
5895 else if (null_byte_found) | |
5896 setup_coding_system (Qno_conversion, coding); | |
5897 else if ((detect_info.rejected & CATEGORY_MASK_ANY) | |
5898 == CATEGORY_MASK_ANY) | |
5899 setup_coding_system (Qraw_text, coding); | |
5900 else if (detect_info.rejected) | |
5901 for (i = 0; i < coding_category_raw_text; i++) | |
5902 if (! (detect_info.rejected & (1 << coding_priorities[i]))) | |
5903 { | |
5904 this = coding_categories + coding_priorities[i]; | |
5905 setup_coding_system (CODING_ID_NAME (this->id), coding); | |
5906 break; | |
5907 } | |
5906 } | 5908 } |
5907 } | 5909 } |
5908 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) | 5910 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
5909 == coding_category_utf_8_auto) | 5911 == coding_category_utf_8_auto) |
5910 { | 5912 { |
7653 coding.src_chars = src_chars; | 7655 coding.src_chars = src_chars; |
7654 coding.src_bytes = src_bytes; | 7656 coding.src_bytes = src_bytes; |
7655 coding.src_multibyte = multibytep; | 7657 coding.src_multibyte = multibytep; |
7656 coding.consumed = 0; | 7658 coding.consumed = 0; |
7657 coding.mode |= CODING_MODE_LAST_BLOCK; | 7659 coding.mode |= CODING_MODE_LAST_BLOCK; |
7660 coding.head_ascii = 0; | |
7658 | 7661 |
7659 detect_info.checked = detect_info.found = detect_info.rejected = 0; | 7662 detect_info.checked = detect_info.found = detect_info.rejected = 0; |
7660 | 7663 |
7661 /* At first, detect text-format if necessary. */ | 7664 /* At first, detect text-format if necessary. */ |
7662 base_category = XINT (CODING_ATTR_CATEGORY (attrs)); | 7665 base_category = XINT (CODING_ATTR_CATEGORY (attrs)); |
7664 { | 7667 { |
7665 enum coding_category category; | 7668 enum coding_category category; |
7666 struct coding_system *this; | 7669 struct coding_system *this; |
7667 int c, i; | 7670 int c, i; |
7668 | 7671 |
7669 coding.head_ascii = -1; | |
7670 /* Skip all ASCII bytes except for a few ISO2022 controls. */ | 7672 /* Skip all ASCII bytes except for a few ISO2022 controls. */ |
7671 for (; src < src_end; src++) | 7673 for (; src < src_end; src++) |
7672 { | 7674 { |
7673 c = *src; | 7675 c = *src; |
7674 if (c & 0x80) | 7676 if (c & 0x80) |
7675 { | 7677 { |
7676 eight_bit_found = 1; | 7678 eight_bit_found = 1; |
7677 if (coding.head_ascii < 0) | |
7678 coding.head_ascii = src - coding.source; | |
7679 if (null_byte_found) | 7679 if (null_byte_found) |
7680 break; | 7680 break; |
7681 } | 7681 } |
7682 if (c < 0x20) | 7682 else if (c < 0x20) |
7683 { | 7683 { |
7684 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 7684 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
7685 && ! inhibit_iso_escape_detection | 7685 && ! inhibit_iso_escape_detection |
7686 && ! detect_info.checked) | 7686 && ! detect_info.checked) |
7687 { | 7687 { |
7688 if (coding.head_ascii < 0) | |
7689 coding.head_ascii = src - coding.source; | |
7690 if (detect_coding_iso_2022 (&coding, &detect_info)) | 7688 if (detect_coding_iso_2022 (&coding, &detect_info)) |
7691 { | 7689 { |
7692 /* We have scanned the whole data. */ | 7690 /* We have scanned the whole data. */ |
7693 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) | 7691 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) |
7694 /* We didn't find an 8-bit code. We may have | 7692 { |
7695 found a null-byte, but it's very rare that | 7693 /* We didn't find an 8-bit code. We may |
7696 a binary file confirm to ISO-2022. */ | 7694 have found a null-byte, but it's very |
7697 src = src_end; | 7695 rare that a binary file confirm to |
7696 ISO-2022. */ | |
7697 src = src_end; | |
7698 coding.head_ascii = src - coding.source; | |
7699 } | |
7700 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; | |
7698 break; | 7701 break; |
7699 } | 7702 } |
7700 } | 7703 } |
7701 else if (! c) | 7704 else if (! c) |
7702 { | 7705 { |
7703 null_byte_found = 1; | 7706 null_byte_found = 1; |
7704 if (eight_bit_found) | 7707 if (eight_bit_found) |
7705 break; | 7708 break; |
7706 } | 7709 } |
7710 coding.head_ascii++; | |
7707 } | 7711 } |
7708 } | 7712 else |
7709 if (coding.head_ascii < 0) | 7713 coding.head_ascii++; |
7710 coding.head_ascii = src - coding.source; | 7714 } |
7711 | 7715 |
7712 if (null_byte_found || eight_bit_found | 7716 if (null_byte_found || eight_bit_found |
7713 || coding.head_ascii < coding.src_bytes | 7717 || coding.head_ascii < coding.src_bytes |
7714 || detect_info.found) | 7718 || detect_info.found) |
7715 { | 7719 { |