Mercurial > emacs
changeset 101172:674e67257137
(TWO_MORE_BYTES): New macro.
(detect_coding_utf_16): Use TWO_MORE_BYTES instead of
ONE_MORE_BYTE.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 14 Jan 2009 12:17:52 +0000 |
parents | a94440e70b7c |
children | 825f62fa0199 |
files | src/coding.c |
diffstat | 1 files changed, 50 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/src/coding.c Wed Jan 14 12:08:49 2009 +0000 +++ b/src/coding.c Wed Jan 14 12:17:52 2009 +0000 @@ -743,6 +743,47 @@ consumed_chars++; \ } while (0) +/* Safely get two bytes from the source text pointed by SRC which ends + at SRC_END, and set C1 and C2 to those bytes. If there are not + enough bytes in the source for C1, it jumps to `no_more_source'. + If there are not enough bytes in the source for C2, set C2 to -1. + If multibytep is nonzero and a multibyte character is found at SRC, + set C1 and/or C2 to the negative value of the character code. The + caller should declare and set these variables appropriately in + advance: + src, src_end, multibytep + It is intended that this macro is used in detect_coding_utf_16. */ + +#define TWO_MORE_BYTES(c1, c2) \ + do { \ + if (src == src_end) \ + goto no_more_source; \ + c1 = *src++; \ + if (multibytep && (c1 & 0x80)) \ + { \ + if ((c1 & 0xFE) == 0xC0) \ + c1 = ((c1 & 1) << 6) | *src++; \ + else \ + { \ + c1 = c2 = -1; \ + break; \ + } \ + } \ + if (src == src_end) \ + c2 = -1; \ + else \ + { \ + c2 = *src++; \ + if (multibytep && (c2 & 0x80)) \ + { \ + if ((c2 & 0xFE) == 0xC0) \ + c2 = ((c2 & 1) << 6) | *src++; \ + else \ + c2 = -1; \ + } \ + } \ + } while (0) + #define ONE_MORE_BYTE_NO_CHECK(c) \ do { \ @@ -1575,8 +1616,7 @@ return 0; } - ONE_MORE_BYTE (c1); - ONE_MORE_BYTE (c2); + TWO_MORE_BYTES (c1, c2); if ((c1 == 0xFF) && (c2 == 0xFE)) { detect_info->found |= (CATEGORY_MASK_UTF_16_LE @@ -1593,6 +1633,11 @@ | CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG); } + else if (c1 < 0 || c2 < 0) + { + detect_info->rejected |= CATEGORY_MASK_UTF_16; + return 0; + } else { /* We check the dispersion of Eth and Oth bytes where E is even and @@ -1610,8 +1655,9 @@ while (1) { - ONE_MORE_BYTE (c1); - ONE_MORE_BYTE (c2); + TWO_MORE_BYTES (c1, c2); + if (c1 < 0 || c2 < 0) + break; if (! e[c1]) { e[c1] = 1;