Mercurial > pidgin.yaz
comparison libpurple/protocols/jabber/jutil.c @ 27530:1b580473e753
Check in a version of jabber_id_new() that is hopefully more efficient.
I think it's less efficient than the version I originally sent to the
devel list and accidentally checked in a day or three ago.
But it's also correct, and passes all our unit tests. I think it can
be optimized a little further by filling in characters in the default case
of the switch statement (see "implement_me") and remove the #if 0 lines.
It's ok if the "implement_me" checks don't list every valid character--
the worst that happens is the check falls through to our nodeprep and
resource prep.
author | Mark Doliner <mark@kingant.net> |
---|---|
date | Fri, 10 Jul 2009 06:37:13 +0000 |
parents | 1c61906755fe |
children | 04d8452dee48 |
comparison
equal
deleted
inserted
replaced
27529:c6fea4ae68b3 | 27530:1b580473e753 |
---|---|
101 | 101 |
102 | 102 |
103 JabberID* | 103 JabberID* |
104 jabber_id_new(const char *str) | 104 jabber_id_new(const char *str) |
105 { | 105 { |
106 char *at; | 106 const char *at = NULL; |
107 char *slash; | 107 const char *slash = NULL; |
108 const char *cur; | |
109 gunichar c; | |
110 gboolean needs_validation = FALSE; | |
111 #if 0 | |
112 gboolean node_is_required = FALSE; | |
113 #endif | |
108 char *node = NULL; | 114 char *node = NULL; |
109 char *domain; | 115 char *domain; |
110 JabberID *jid; | 116 JabberID *jid; |
111 | 117 |
112 if(!str || !g_utf8_validate(str, -1, NULL)) | 118 if (!str) |
119 return NULL; | |
120 | |
121 for (cur = str; *cur != '\0'; cur = g_utf8_next_char(cur)) | |
122 { | |
123 c = g_utf8_get_char(cur); | |
124 switch (c) { | |
125 case '@': | |
126 if (!slash) { | |
127 if (at) { | |
128 /* Multiple @'s in the node/domain portion, not a valid JID! */ | |
129 return NULL; | |
130 } | |
131 if (cur == str) { | |
132 /* JIDs cannot start with @ */ | |
133 return NULL; | |
134 } | |
135 if ((g_utf8_next_char(cur))[0] == '\0') { | |
136 /* JIDs cannot end with @ */ | |
137 return NULL; | |
138 } | |
139 at = cur; | |
140 } | |
141 break; | |
142 | |
143 case '/': | |
144 if (!slash) { | |
145 if (cur == str) { | |
146 /* JIDs cannot start with / */ | |
147 return NULL; | |
148 } | |
149 if ((g_utf8_next_char(cur))[0] == '\0') { | |
150 /* JIDs cannot end with / */ | |
151 return NULL; | |
152 } | |
153 slash = cur; | |
154 } | |
155 break; | |
156 | |
157 default: | |
158 /* characters allowed everywhere */ | |
159 if ((c > 'a' && c < 'z') | |
160 || (c > '0' && c < '9') | |
161 || (c > 'A' && c < 'Z') | |
162 || c == '.' || c == '-') | |
163 /* We're good */ | |
164 break; | |
165 | |
166 #if 0 | |
167 if (slash != NULL) { | |
168 /* characters allowed only in the resource */ | |
169 if (implement_me) | |
170 /* We're good */ | |
171 break; | |
172 } | |
173 | |
174 /* characters allowed only in the node */ | |
175 if (implement_me) { | |
176 /* | |
177 * Ok, this character is valid, but only if it's a part | |
178 * of the node and not the domain. But we don't know | |
179 * if "c" is a part of the node or the domain until after | |
180 * we've found the @. So set a flag for now and check | |
181 * that we found an @ later. | |
182 */ | |
183 node_is_required = TRUE; | |
184 break; | |
185 } | |
186 #endif | |
187 | |
188 /* | |
189 * Hmm, this character is a bit more exotic. Better fall | |
190 * back to using the more expensive UTF-8 compliant | |
191 * stringprep functions. | |
192 */ | |
193 needs_validation = TRUE; | |
194 break; | |
195 } | |
196 } | |
197 | |
198 #if 0 | |
199 if (node_is_required && at == NULL) | |
200 /* Found invalid characters in the domain */ | |
201 return NULL; | |
202 #endif | |
203 | |
204 if (!needs_validation) { | |
205 /* JID is made of only ASCII characters--just lowercase and return */ | |
206 jid = g_new0(JabberID, 1); | |
207 | |
208 if (at) { | |
209 jid->node = g_ascii_strdown(str, at - str); | |
210 if (slash) { | |
211 jid->domain = g_ascii_strdown(at + 1, slash - (at + 1)); | |
212 jid->resource = g_strdup(slash + 1); | |
213 } else { | |
214 jid->domain = g_ascii_strdown(at + 1, -1); | |
215 } | |
216 } else { | |
217 if (slash) { | |
218 jid->domain = g_ascii_strdown(str, slash - str); | |
219 jid->resource = g_strdup(slash + 1); | |
220 } else { | |
221 jid->domain = g_ascii_strdown(str, -1); | |
222 } | |
223 } | |
224 return jid; | |
225 } | |
226 | |
227 /* | |
228 * If we get here, there are some non-ASCII chars in the string, so | |
229 * we'll need to validate it, normalize, and finally do a full jabber | |
230 * nodeprep on the jid. | |
231 */ | |
232 | |
233 if (!g_utf8_validate(str, -1, NULL)) | |
113 return NULL; | 234 return NULL; |
114 | 235 |
115 jid = g_new0(JabberID, 1); | 236 jid = g_new0(JabberID, 1); |
116 | 237 |
117 at = g_utf8_strchr(str, -1, '@'); | 238 /* normalization */ |
118 slash = g_utf8_strchr(str, -1, '/'); | |
119 | |
120 if(at) { | 239 if(at) { |
121 node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC); | 240 node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC); |
122 if(slash) { | 241 if(slash) { |
123 domain = g_utf8_normalize(at+1, slash-(at+1), G_NORMALIZE_NFKC); | 242 domain = g_utf8_normalize(at+1, slash-(at+1), G_NORMALIZE_NFKC); |
124 jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC); | 243 jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC); |
142 if (domain) { | 261 if (domain) { |
143 jid->domain = g_utf8_strdown(domain, -1); | 262 jid->domain = g_utf8_strdown(domain, -1); |
144 g_free(domain); | 263 g_free(domain); |
145 } | 264 } |
146 | 265 |
266 /* and finally the jabber nodeprep */ | |
147 if(!jabber_nodeprep_validate(jid->node) || | 267 if(!jabber_nodeprep_validate(jid->node) || |
148 !jabber_nameprep_validate(jid->domain) || | 268 !jabber_nameprep_validate(jid->domain) || |
149 !jabber_resourceprep_validate(jid->resource)) { | 269 !jabber_resourceprep_validate(jid->resource)) { |
150 jabber_id_free(jid); | 270 jabber_id_free(jid); |
151 return NULL; | 271 return NULL; |