comparison src/coding.c @ 101172:674e67257137

(TWO_MORE_BYTES): New macro. (detect_coding_utf_16): Use TWO_MORE_BYTES instead of ONE_MORE_BYTE.
author Kenichi Handa <handa@m17n.org>
date Wed, 14 Jan 2009 12:17:52 +0000
parents 92b6c5b767f8
children 198d8bf06a4f
comparison
equal deleted inserted replaced
101171:a94440e70b7c 101172:674e67257137
739 record_conversion_result \ 739 record_conversion_result \
740 (coding, CODING_RESULT_INVALID_SRC); \ 740 (coding, CODING_RESULT_INVALID_SRC); \
741 } \ 741 } \
742 } \ 742 } \
743 consumed_chars++; \ 743 consumed_chars++; \
744 } while (0)
745
746 /* Safely get two bytes from the source text pointed by SRC which ends
747 at SRC_END, and set C1 and C2 to those bytes. If there are not
748 enough bytes in the source for C1, it jumps to `no_more_source'.
749 If there are not enough bytes in the source for C2, set C2 to -1.
750 If multibytep is nonzero and a multibyte character is found at SRC,
751 set C1 and/or C2 to the negative value of the character code. The
752 caller should declare and set these variables appropriately in
753 advance:
754 src, src_end, multibytep
755 It is intended that this macro is used in detect_coding_utf_16. */
756
757 #define TWO_MORE_BYTES(c1, c2) \
758 do { \
759 if (src == src_end) \
760 goto no_more_source; \
761 c1 = *src++; \
762 if (multibytep && (c1 & 0x80)) \
763 { \
764 if ((c1 & 0xFE) == 0xC0) \
765 c1 = ((c1 & 1) << 6) | *src++; \
766 else \
767 { \
768 c1 = c2 = -1; \
769 break; \
770 } \
771 } \
772 if (src == src_end) \
773 c2 = -1; \
774 else \
775 { \
776 c2 = *src++; \
777 if (multibytep && (c2 & 0x80)) \
778 { \
779 if ((c2 & 0xFE) == 0xC0) \
780 c2 = ((c2 & 1) << 6) | *src++; \
781 else \
782 c2 = -1; \
783 } \
784 } \
744 } while (0) 785 } while (0)
745 786
746 787
747 #define ONE_MORE_BYTE_NO_CHECK(c) \ 788 #define ONE_MORE_BYTE_NO_CHECK(c) \
748 do { \ 789 do { \
1573 { 1614 {
1574 detect_info->rejected |= CATEGORY_MASK_UTF_16; 1615 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1575 return 0; 1616 return 0;
1576 } 1617 }
1577 1618
1578 ONE_MORE_BYTE (c1); 1619 TWO_MORE_BYTES (c1, c2);
1579 ONE_MORE_BYTE (c2);
1580 if ((c1 == 0xFF) && (c2 == 0xFE)) 1620 if ((c1 == 0xFF) && (c2 == 0xFE))
1581 { 1621 {
1582 detect_info->found |= (CATEGORY_MASK_UTF_16_LE 1622 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1583 | CATEGORY_MASK_UTF_16_AUTO); 1623 | CATEGORY_MASK_UTF_16_AUTO);
1584 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE 1624 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1591 | CATEGORY_MASK_UTF_16_AUTO); 1631 | CATEGORY_MASK_UTF_16_AUTO);
1592 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE 1632 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1593 | CATEGORY_MASK_UTF_16_BE_NOSIG 1633 | CATEGORY_MASK_UTF_16_BE_NOSIG
1594 | CATEGORY_MASK_UTF_16_LE_NOSIG); 1634 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1595 } 1635 }
1636 else if (c1 < 0 || c2 < 0)
1637 {
1638 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1639 return 0;
1640 }
1596 else 1641 else
1597 { 1642 {
1598 /* We check the dispersion of Eth and Oth bytes where E is even and 1643 /* We check the dispersion of Eth and Oth bytes where E is even and
1599 O is odd. If both are high, we assume binary data.*/ 1644 O is odd. If both are high, we assume binary data.*/
1600 unsigned char e[256], o[256]; 1645 unsigned char e[256], o[256];
1608 detect_info->rejected 1653 detect_info->rejected
1609 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE); 1654 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1610 1655
1611 while (1) 1656 while (1)
1612 { 1657 {
1613 ONE_MORE_BYTE (c1); 1658 TWO_MORE_BYTES (c1, c2);
1614 ONE_MORE_BYTE (c2); 1659 if (c1 < 0 || c2 < 0)
1660 break;
1615 if (! e[c1]) 1661 if (! e[c1])
1616 { 1662 {
1617 e[c1] = 1; 1663 e[c1] = 1;
1618 e_num++; 1664 e_num++;
1619 if (e_num >= 128) 1665 if (e_num >= 128)