comparison src/regex.c @ 90790:385c9b28d8a7

(analyse_first): Fix for multibyte characters in "case charset:" and "case categoryspec:".
author Kenichi Handa <handa@m17n.org>
date Fri, 23 Mar 2007 05:44:14 +0000
parents 128e9642f988
children 3619e7770f2e
comparison
equal deleted inserted replaced
90789:c0409ee15cee 90790:385c9b28d8a7
4101 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; 4101 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
4102 j >= 0; j--) 4102 j >= 0; j--)
4103 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) 4103 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
4104 fastmap[j] = 1; 4104 fastmap[j] = 1;
4105 4105
4106 if ((not && multibyte) 4106 #ifdef emacs
4107 /* Any leading code can possibly start a character 4107 if (/* Any leading code can possibly start a character
4108 which doesn't match the specified set of characters. */ 4108 which doesn't match the specified set of characters. */
4109 || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) 4109 not
4110 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)) 4110 ||
4111 /* If we can match a character class, we can match 4111 /* If we can match a character class, we can match any
4112 any multibyte characters. */ 4112 multibyte characters. */
4113 (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
4114 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
4115
4113 { 4116 {
4114 if (match_any_multibyte_characters == false) 4117 if (match_any_multibyte_characters == false)
4115 { 4118 {
4116 for (j = 0x80; j < (1 << BYTEWIDTH); j++) 4119 for (j = MIN_MULTIBYTE_LEADING_CODE;
4120 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4117 fastmap[j] = 1; 4121 fastmap[j] = 1;
4118 match_any_multibyte_characters = true; 4122 match_any_multibyte_characters = true;
4119 } 4123 }
4120 } 4124 }
4121 4125
4143 lc2 = CHAR_LEADING_CODE (c); 4147 lc2 = CHAR_LEADING_CODE (c);
4144 for (j = lc1; j <= lc2; j++) 4148 for (j = lc1; j <= lc2; j++)
4145 fastmap[j] = 1; 4149 fastmap[j] = 1;
4146 } 4150 }
4147 } 4151 }
4152 #endif
4148 break; 4153 break;
4149 4154
4150 case syntaxspec: 4155 case syntaxspec:
4151 case notsyntaxspec: 4156 case notsyntaxspec:
4152 if (!fastmap) break; 4157 if (!fastmap) break;
4165 case categoryspec: 4170 case categoryspec:
4166 case notcategoryspec: 4171 case notcategoryspec:
4167 if (!fastmap) break; 4172 if (!fastmap) break;
4168 not = (re_opcode_t)p[-1] == notcategoryspec; 4173 not = (re_opcode_t)p[-1] == notcategoryspec;
4169 k = *p++; 4174 k = *p++;
4170 for (j = (multibyte ? 127 : (1 << BYTEWIDTH)); j >= 0; j--) 4175 for (j = (1 << BYTEWIDTH); j >= 0; j--)
4171 if ((CHAR_HAS_CATEGORY (j, k)) ^ not) 4176 if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
4172 fastmap[j] = 1; 4177 fastmap[j] = 1;
4173 4178
4174 if (multibyte) 4179 /* Any leading code can possibly start a character which
4180 has or doesn't has the specified category. */
4181 if (match_any_multibyte_characters == false)
4175 { 4182 {
4176 /* Any character set can possibly contain a character 4183 for (j = MIN_MULTIBYTE_LEADING_CODE;
4177 whose category is K (or not). */ 4184 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4178 if (match_any_multibyte_characters == false) 4185 fastmap[j] = 1;
4179 { 4186 match_any_multibyte_characters = true;
4180 for (j = 0x80; j < (1 << BYTEWIDTH); j++)
4181 fastmap[j] = 1;
4182 match_any_multibyte_characters = true;
4183 }
4184 } 4187 }
4185 break; 4188 break;
4186 4189
4187 /* All cases after this match the empty string. These end with 4190 /* All cases after this match the empty string. These end with
4188 `continue'. */ 4191 `continue'. */