comparison src/coding.c @ 101174:198d8bf06a4f

Fix previous changes.
author Kenichi Handa <handa@m17n.org>
date Wed, 14 Jan 2009 12:51:06 +0000
parents 674e67257137
children ee5f395f3c19
comparison
equal deleted inserted replaced
101173:825f62fa0199 101174:198d8bf06a4f
742 } \ 742 } \
743 consumed_chars++; \ 743 consumed_chars++; \
744 } while (0) 744 } while (0)
745 745
746 /* Safely get two bytes from the source text pointed by SRC which ends 746 /* Safely get two bytes from the source text pointed by SRC which ends
747 at SRC_END, and set C1 and C2 to those bytes. If there are not 747 at SRC_END, and set C1 and C2 to those bytes while skipping the
748 enough bytes in the source for C1, it jumps to `no_more_source'. 748 heading multibyte characters. If there are not enough bytes in the
749 If there are not enough bytes in the source for C2, set C2 to -1. 749 source, it jumps to `no_more_source'. If multibytep is nonzero and
750 If multibytep is nonzero and a multibyte character is found at SRC, 750 a multibyte character is found for C2, set C2 to the negative value
751 set C1 and/or C2 to the negative value of the character code. The 751 of the character code. The caller should declare and set these
752 caller should declare and set these variables appropriately in 752 variables appropriately in advance:
753 advance:
754 src, src_end, multibytep 753 src, src_end, multibytep
755 It is intended that this macro is used in detect_coding_utf_16. */ 754 It is intended that this macro is used in detect_coding_utf_16. */
756 755
757 #define TWO_MORE_BYTES(c1, c2) \ 756 #define TWO_MORE_BYTES(c1, c2) \
758 do { \ 757 do { \
759 if (src == src_end) \ 758 do { \
760 goto no_more_source; \ 759 if (src == src_end) \
761 c1 = *src++; \ 760 goto no_more_source; \
762 if (multibytep && (c1 & 0x80)) \ 761 c1 = *src++; \
763 { \ 762 if (multibytep && (c1 & 0x80)) \
764 if ((c1 & 0xFE) == 0xC0) \ 763 { \
765 c1 = ((c1 & 1) << 6) | *src++; \ 764 if ((c1 & 0xFE) == 0xC0) \
766 else \ 765 c1 = ((c1 & 1) << 6) | *src++; \
767 { \ 766 else \
768 c1 = c2 = -1; \ 767 { \
769 break; \ 768 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
770 } \ 769 c1 = -1; \
771 } \ 770 } \
772 if (src == src_end) \ 771 } \
773 c2 = -1; \ 772 } while (c1 < 0); \
774 else \ 773 if (src == src_end) \
775 { \ 774 goto no_more_source; \
776 c2 = *src++; \ 775 c2 = *src++; \
777 if (multibytep && (c2 & 0x80)) \ 776 if (multibytep && (c2 & 0x80)) \
778 { \ 777 { \
779 if ((c2 & 0xFE) == 0xC0) \ 778 if ((c2 & 0xFE) == 0xC0) \
780 c2 = ((c2 & 1) << 6) | *src++; \ 779 c2 = ((c2 & 1) << 6) | *src++; \
781 else \ 780 else \
782 c2 = -1; \ 781 c2 = -1; \
783 } \ 782 } \
784 } \
785 } while (0) 783 } while (0)
786 784
787 785
788 #define ONE_MORE_BYTE_NO_CHECK(c) \ 786 #define ONE_MORE_BYTE_NO_CHECK(c) \
789 do { \ 787 do { \
1631 | CATEGORY_MASK_UTF_16_AUTO); 1629 | CATEGORY_MASK_UTF_16_AUTO);
1632 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE 1630 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1633 | CATEGORY_MASK_UTF_16_BE_NOSIG 1631 | CATEGORY_MASK_UTF_16_BE_NOSIG
1634 | CATEGORY_MASK_UTF_16_LE_NOSIG); 1632 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1635 } 1633 }
1636 else if (c1 < 0 || c2 < 0) 1634 else if (c2 < 0)
1637 { 1635 {
1638 detect_info->rejected |= CATEGORY_MASK_UTF_16; 1636 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1639 return 0; 1637 return 0;
1640 } 1638 }
1641 else 1639 else
1654 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE); 1652 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1655 1653
1656 while (1) 1654 while (1)
1657 { 1655 {
1658 TWO_MORE_BYTES (c1, c2); 1656 TWO_MORE_BYTES (c1, c2);
1659 if (c1 < 0 || c2 < 0) 1657 if (c2 < 0)
1660 break; 1658 break;
1661 if (! e[c1]) 1659 if (! e[c1])
1662 { 1660 {
1663 e[c1] = 1; 1661 e[c1] = 1;
1664 e_num++; 1662 e_num++;