comparison libpurple/protocols/jabber/jutil.c @ 27530:1b580473e753

Check in a version of jabber_id_new() that is hopefully more efficient. I think it's less efficient than the version I originally sent to the devel list and accidentally checked in a day or three ago. But it's also correct, and passes all our unit tests. I think it can be optimized a little further by filling in characters in the default case of the switch statement (see "implement_me") and remove the #if 0 lines. It's ok if the "implement_me" checks don't list every valid character-- the worst that happens is the check falls through to our nodeprep and resource prep.
author Mark Doliner <mark@kingant.net>
date Fri, 10 Jul 2009 06:37:13 +0000
parents 1c61906755fe
children 04d8452dee48
comparison
equal deleted inserted replaced
27529:c6fea4ae68b3 27530:1b580473e753
101 101
102 102
103 JabberID* 103 JabberID*
104 jabber_id_new(const char *str) 104 jabber_id_new(const char *str)
105 { 105 {
106 char *at; 106 const char *at = NULL;
107 char *slash; 107 const char *slash = NULL;
108 const char *cur;
109 gunichar c;
110 gboolean needs_validation = FALSE;
111 #if 0
112 gboolean node_is_required = FALSE;
113 #endif
108 char *node = NULL; 114 char *node = NULL;
109 char *domain; 115 char *domain;
110 JabberID *jid; 116 JabberID *jid;
111 117
112 if(!str || !g_utf8_validate(str, -1, NULL)) 118 if (!str)
119 return NULL;
120
121 for (cur = str; *cur != '\0'; cur = g_utf8_next_char(cur))
122 {
123 c = g_utf8_get_char(cur);
124 switch (c) {
125 case '@':
126 if (!slash) {
127 if (at) {
128 /* Multiple @'s in the node/domain portion, not a valid JID! */
129 return NULL;
130 }
131 if (cur == str) {
132 /* JIDs cannot start with @ */
133 return NULL;
134 }
135 if ((g_utf8_next_char(cur))[0] == '\0') {
136 /* JIDs cannot end with @ */
137 return NULL;
138 }
139 at = cur;
140 }
141 break;
142
143 case '/':
144 if (!slash) {
145 if (cur == str) {
146 /* JIDs cannot start with / */
147 return NULL;
148 }
149 if ((g_utf8_next_char(cur))[0] == '\0') {
150 /* JIDs cannot end with / */
151 return NULL;
152 }
153 slash = cur;
154 }
155 break;
156
157 default:
158 /* characters allowed everywhere */
159 if ((c > 'a' && c < 'z')
160 || (c > '0' && c < '9')
161 || (c > 'A' && c < 'Z')
162 || c == '.' || c == '-')
163 /* We're good */
164 break;
165
166 #if 0
167 if (slash != NULL) {
168 /* characters allowed only in the resource */
169 if (implement_me)
170 /* We're good */
171 break;
172 }
173
174 /* characters allowed only in the node */
175 if (implement_me) {
176 /*
177 * Ok, this character is valid, but only if it's a part
178 * of the node and not the domain. But we don't know
179 * if "c" is a part of the node or the domain until after
180 * we've found the @. So set a flag for now and check
181 * that we found an @ later.
182 */
183 node_is_required = TRUE;
184 break;
185 }
186 #endif
187
188 /*
189 * Hmm, this character is a bit more exotic. Better fall
190 * back to using the more expensive UTF-8 compliant
191 * stringprep functions.
192 */
193 needs_validation = TRUE;
194 break;
195 }
196 }
197
198 #if 0
199 if (node_is_required && at == NULL)
200 /* Found invalid characters in the domain */
201 return NULL;
202 #endif
203
204 if (!needs_validation) {
205 /* JID is made of only ASCII characters--just lowercase and return */
206 jid = g_new0(JabberID, 1);
207
208 if (at) {
209 jid->node = g_ascii_strdown(str, at - str);
210 if (slash) {
211 jid->domain = g_ascii_strdown(at + 1, slash - (at + 1));
212 jid->resource = g_strdup(slash + 1);
213 } else {
214 jid->domain = g_ascii_strdown(at + 1, -1);
215 }
216 } else {
217 if (slash) {
218 jid->domain = g_ascii_strdown(str, slash - str);
219 jid->resource = g_strdup(slash + 1);
220 } else {
221 jid->domain = g_ascii_strdown(str, -1);
222 }
223 }
224 return jid;
225 }
226
227 /*
228 * If we get here, there are some non-ASCII chars in the string, so
229 * we'll need to validate it, normalize, and finally do a full jabber
230 * nodeprep on the jid.
231 */
232
233 if (!g_utf8_validate(str, -1, NULL))
113 return NULL; 234 return NULL;
114 235
115 jid = g_new0(JabberID, 1); 236 jid = g_new0(JabberID, 1);
116 237
117 at = g_utf8_strchr(str, -1, '@'); 238 /* normalization */
118 slash = g_utf8_strchr(str, -1, '/');
119
120 if(at) { 239 if(at) {
121 node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC); 240 node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC);
122 if(slash) { 241 if(slash) {
123 domain = g_utf8_normalize(at+1, slash-(at+1), G_NORMALIZE_NFKC); 242 domain = g_utf8_normalize(at+1, slash-(at+1), G_NORMALIZE_NFKC);
124 jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC); 243 jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC);
142 if (domain) { 261 if (domain) {
143 jid->domain = g_utf8_strdown(domain, -1); 262 jid->domain = g_utf8_strdown(domain, -1);
144 g_free(domain); 263 g_free(domain);
145 } 264 }
146 265
266 /* and finally the jabber nodeprep */
147 if(!jabber_nodeprep_validate(jid->node) || 267 if(!jabber_nodeprep_validate(jid->node) ||
148 !jabber_nameprep_validate(jid->domain) || 268 !jabber_nameprep_validate(jid->domain) ||
149 !jabber_resourceprep_validate(jid->resource)) { 269 !jabber_resourceprep_validate(jid->resource)) {
150 jabber_id_free(jid); 270 jabber_id_free(jid);
151 return NULL; 271 return NULL;