Mercurial > emacs
comparison src/regex.c @ 90790:385c9b28d8a7
(analyse_first): Fix for multibyte characters in "case
charset:" and "case categoryspec:".
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 23 Mar 2007 05:44:14 +0000 |
parents | 128e9642f988 |
children | 3619e7770f2e |
comparison
equal
deleted
inserted
replaced
90789:c0409ee15cee | 90790:385c9b28d8a7 |
---|---|
4101 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; | 4101 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; |
4102 j >= 0; j--) | 4102 j >= 0; j--) |
4103 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) | 4103 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) |
4104 fastmap[j] = 1; | 4104 fastmap[j] = 1; |
4105 | 4105 |
4106 if ((not && multibyte) | 4106 #ifdef emacs |
4107 /* Any leading code can possibly start a character | 4107 if (/* Any leading code can possibly start a character |
4108 which doesn't match the specified set of characters. */ | 4108 which doesn't match the specified set of characters. */ |
4109 || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) | 4109 not |
4110 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) | 4110 || |
4111 /* If we can match a character class, we can match | 4111 /* If we can match a character class, we can match any |
4112 any multibyte characters. */ | 4112 multibyte characters. */ |
4113 (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) | |
4114 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) | |
4115 | |
4113 { | 4116 { |
4114 if (match_any_multibyte_characters == false) | 4117 if (match_any_multibyte_characters == false) |
4115 { | 4118 { |
4116 for (j = 0x80; j < (1 << BYTEWIDTH); j++) | 4119 for (j = MIN_MULTIBYTE_LEADING_CODE; |
4120 j <= MAX_MULTIBYTE_LEADING_CODE; j++) | |
4117 fastmap[j] = 1; | 4121 fastmap[j] = 1; |
4118 match_any_multibyte_characters = true; | 4122 match_any_multibyte_characters = true; |
4119 } | 4123 } |
4120 } | 4124 } |
4121 | 4125 |
4143 lc2 = CHAR_LEADING_CODE (c); | 4147 lc2 = CHAR_LEADING_CODE (c); |
4144 for (j = lc1; j <= lc2; j++) | 4148 for (j = lc1; j <= lc2; j++) |
4145 fastmap[j] = 1; | 4149 fastmap[j] = 1; |
4146 } | 4150 } |
4147 } | 4151 } |
4152 #endif | |
4148 break; | 4153 break; |
4149 | 4154 |
4150 case syntaxspec: | 4155 case syntaxspec: |
4151 case notsyntaxspec: | 4156 case notsyntaxspec: |
4152 if (!fastmap) break; | 4157 if (!fastmap) break; |
4165 case categoryspec: | 4170 case categoryspec: |
4166 case notcategoryspec: | 4171 case notcategoryspec: |
4167 if (!fastmap) break; | 4172 if (!fastmap) break; |
4168 not = (re_opcode_t)p[-1] == notcategoryspec; | 4173 not = (re_opcode_t)p[-1] == notcategoryspec; |
4169 k = *p++; | 4174 k = *p++; |
4170 for (j = (multibyte ? 127 : (1 << BYTEWIDTH)); j >= 0; j--) | 4175 for (j = (1 << BYTEWIDTH); j >= 0; j--) |
4171 if ((CHAR_HAS_CATEGORY (j, k)) ^ not) | 4176 if ((CHAR_HAS_CATEGORY (j, k)) ^ not) |
4172 fastmap[j] = 1; | 4177 fastmap[j] = 1; |
4173 | 4178 |
4174 if (multibyte) | 4179 /* Any leading code can possibly start a character which |
4180 has or doesn't has the specified category. */ | |
4181 if (match_any_multibyte_characters == false) | |
4175 { | 4182 { |
4176 /* Any character set can possibly contain a character | 4183 for (j = MIN_MULTIBYTE_LEADING_CODE; |
4177 whose category is K (or not). */ | 4184 j <= MAX_MULTIBYTE_LEADING_CODE; j++) |
4178 if (match_any_multibyte_characters == false) | 4185 fastmap[j] = 1; |
4179 { | 4186 match_any_multibyte_characters = true; |
4180 for (j = 0x80; j < (1 << BYTEWIDTH); j++) | |
4181 fastmap[j] = 1; | |
4182 match_any_multibyte_characters = true; | |
4183 } | |
4184 } | 4187 } |
4185 break; | 4188 break; |
4186 | 4189 |
4187 /* All cases after this match the empty string. These end with | 4190 /* All cases after this match the empty string. These end with |
4188 `continue'. */ | 4191 `continue'. */ |