# HG changeset patch # User Kenichi Handa # Date 1174628654 0 # Node ID 385c9b28d8a708ad32d8095946176d6adc7a5503 # Parent c0409ee15cee0ffd93781bebd65b4b2f3c676741 (analyse_first): Fix for multibyte characters in "case charset:" and "case categoryspec:". diff -r c0409ee15cee -r 385c9b28d8a7 src/regex.c --- a/src/regex.c Wed Mar 21 13:33:07 2007 +0000 +++ b/src/regex.c Fri Mar 23 05:44:14 2007 +0000 @@ -4103,17 +4103,21 @@ if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) fastmap[j] = 1; - if ((not && multibyte) - /* Any leading code can possibly start a character +#ifdef emacs + if (/* Any leading code can possibly start a character which doesn't match the specified set of characters. */ - || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) - && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) - /* If we can match a character class, we can match - any multibyte characters. */ + not + || + /* If we can match a character class, we can match any + multibyte characters. */ + (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) + && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) + { if (match_any_multibyte_characters == false) { - for (j = 0x80; j < (1 << BYTEWIDTH); j++) + for (j = MIN_MULTIBYTE_LEADING_CODE; + j <= MAX_MULTIBYTE_LEADING_CODE; j++) fastmap[j] = 1; match_any_multibyte_characters = true; } @@ -4145,6 +4149,7 @@ fastmap[j] = 1; } } +#endif break; case syntaxspec: @@ -4167,20 +4172,18 @@ if (!fastmap) break; not = (re_opcode_t)p[-1] == notcategoryspec; k = *p++; - for (j = (multibyte ? 127 : (1 << BYTEWIDTH)); j >= 0; j--) + for (j = (1 << BYTEWIDTH); j >= 0; j--) if ((CHAR_HAS_CATEGORY (j, k)) ^ not) fastmap[j] = 1; - if (multibyte) + /* Any leading code can possibly start a character which + has or doesn't has the specified category. */ + if (match_any_multibyte_characters == false) { - /* Any character set can possibly contain a character - whose category is K (or not). */ - if (match_any_multibyte_characters == false) - { - for (j = 0x80; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - match_any_multibyte_characters = true; - } + for (j = MIN_MULTIBYTE_LEADING_CODE; + j <= MAX_MULTIBYTE_LEADING_CODE; j++) + fastmap[j] = 1; + match_any_multibyte_characters = true; } break;