comparison src/util.c @ 8958:60a47725df97

[gaim-migrate @ 9732] " I tried to send myself some test mail with the subject Subject: =?Big5?Q?=B4=FA=B8=D5?= (Chinese for "test"). Gaim, however, displays the following notification: Subject: msnB4=FA=B8=D5?= I tried to partially rewrite gaim_mime_decode_field in util.c to fix this problem; the results of the rewrite is attached. I have tested this and it should work correctly. (This does not fix the MSN mail notification crashes, however.)" --Ambrose C. LI committer: Tailor Script <tailor@pidgin.im>
author Luke Schierer <lschiere@pidgin.im>
date Mon, 17 May 2004 02:00:17 +0000
parents 54eba3833e34
children 6f21aa413b18
comparison
equal deleted inserted replaced
8957:97a1f314b051 8958:60a47725df97
301 **************************************************************************/ 301 **************************************************************************/
302 char * 302 char *
303 gaim_mime_decode_field(const char *str) 303 gaim_mime_decode_field(const char *str)
304 { 304 {
305 /* 305 /*
306 * This is revo/shx's version. It has had some problems with 306 * This is wing's version, partially based on revo/shx's version
307 * crashing, but it's probably a better implementation. 307 * See RFC2047 [which apparently obsoletes RFC1342]
308 */ 308 */
309 typedef enum {
310 state_start, state_equal1, state_question1,
311 state_charset, state_question2,
312 state_encoding, state_question3,
313 state_encoded_text, state_question4, state_equal2 = state_start
314 } encoded_word_state_t;
315 encoded_word_state_t state = state_start;
309 const char *cur, *mark; 316 const char *cur, *mark;
310 const char *unencoded, *encoded; 317 const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL;
311 char *n, *new; 318 char *n, *new;
312 319
320 /* token can be any CHAR, not necessarily ASCII */
321 #define token_char_p(c) \
322 (c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c))
323
324 /* But encoded-text must be ASCII; alas, isascii() may not exist */
325 #define encoded_text_char_p(c) \
326 ((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c))
327
328 #define RECOVER_MARKED_TEXT strncpy(n, mark, cur - mark + 1); \
329 n += cur - mark + 1
330
331 /* NOTE: Assuming that we need just strlen(str)+1 may be wrong */
313 n = new = g_malloc(strlen(str) + 1); 332 n = new = g_malloc(strlen(str) + 1);
314 333
315 /* Here we will be looking for encoded words and if they seem to be 334 /* Here we will be looking for encoded words and if they seem to be
316 * valid then decode them. 335 * valid then decode them.
317 * They are of this form: =?charset?encoding?text?= 336 * They are of this form: =?charset?encoding?text?=
318 */ 337 */
319 338
320 for (unencoded = cur = str; (encoded = cur = strstr(cur, "=?")); unencoded = cur) { 339 for (cur = str, mark = NULL; *cur; cur += 1) {
321 gboolean found_word = FALSE; 340 switch (state) {
322 int i, num, dec_len; 341 case state_equal1:
323 gsize len;
324 char *decoded, *converted;
325 char *tokens[3];
326
327 /* Let's look for tokens, they are between ?'s */
328 for (cur += 2, mark = cur, num = 0; *cur; cur++) {
329 if (*cur == '?') { 342 if (*cur == '?') {
330 if (num > 2) 343 state = state_question1;
331 /* No more than 3 tokens. */ 344 } else {
332 break; 345 RECOVER_MARKED_TEXT;
333 346 state = state_start;
334 tokens[num++] = g_strndup(mark, cur - mark); 347 }
335 348 break;
336 mark = (cur + 1); 349 case state_question1:
337 350 if (token_char_p(*cur)) {
338 if (*mark == '=') { 351 charset0 = cur;
339 found_word = TRUE; 352 state = state_charset;
340 break; 353 } else { /* This should never happen */
341 } 354 RECOVER_MARKED_TEXT;
342 } 355 state = state_start;
343 #if 0 356 }
344 /* I think this is rarely going to happen, if at all */ 357 break;
345 else if ((num < 2) && (strchr("()<>@,;:/[]", *cur))) 358 case state_charset:
346 /* There can't be these characters in the first two tokens. */ 359 if (*cur == '?') {
347 break; 360 state = state_question2;
348 else if ((num == 2) && (*cur == ' ')) 361 } else if (!token_char_p(*cur)) {
349 /* There can't be spaces in the third token. */ 362 RECOVER_MARKED_TEXT;
350 break; 363 state = state_start;
351 #endif 364 }
352 } 365 break;
353 366 case state_question2:
354 cur += 2; 367 if (token_char_p(*cur)) {
355 368 encoding0 = cur;
356 if (found_word) { 369 state = state_encoding;
357 /* We found an encoded word. */ 370 } else { /* This should never happen */
358 /* =?charset?encoding?text?= */ 371 RECOVER_MARKED_TEXT;
359 372 state = state_start;
360 /* Some unencoded text. */ 373 }
361 len = encoded - unencoded; 374 break;
362 n = strncpy(n, unencoded, len) + len; 375 case state_encoding:
363 376 if (*cur == '?') {
364 if (g_ascii_strcasecmp(tokens[1], "Q") == 0) 377 state = state_question3;
365 gaim_quotedp_decode(tokens[2], &decoded, &dec_len); 378 } else if (!token_char_p(*cur)) {
366 else if (g_ascii_strcasecmp(tokens[1], "B") == 0) 379 RECOVER_MARKED_TEXT;
367 gaim_base64_decode(tokens[2], &decoded, &dec_len); 380 state = state_start;
368 else 381 }
369 decoded = NULL; 382 break;
370 383 case state_question3:
371 if (decoded) { 384 if (encoded_text_char_p(*cur)) {
372 converted = g_convert(decoded, dec_len, "utf-8", tokens[0], NULL, &len, NULL); 385 encoded_text0 = cur;
373 386 state = state_encoded_text;
374 if (converted) { 387 } else { /* This should never happen */
375 n = strncpy(n, converted, len) + len; 388 RECOVER_MARKED_TEXT;
376 g_free(converted); 389 state = state_start;
377 } else if (len) { 390 }
378 converted = g_convert(decoded, len, "utf-8", tokens[0], NULL, &len, NULL); 391 break;
379 n = strncpy(n, converted, len) + len; 392 case state_encoded_text:
380 g_free(converted); 393 if (*cur == '?') {
381 } 394 state = state_question4;
382 g_free(decoded); 395 } else if (!encoded_text_char_p(*cur)) {
383 } 396 RECOVER_MARKED_TEXT;
384 } else { 397 state = state_start;
385 /* Some unencoded text. */ 398 }
386 len = cur - unencoded; 399 break;
387 n = strncpy(n, unencoded, len) + len; 400 case state_question4:
388 } 401 if (*cur == '=') { /* Got the whole encoded-word */
389 402 char *charset = g_strndup(charset0, encoding0 - charset0 - 1);
390 for (i = 0; i < num; i++) 403 char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1);
391 g_free(tokens[i]); 404 char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1);
392 } 405 char *decoded = NULL;
393 406 int dec_len;
407 if (g_ascii_strcasecmp(encoding, "Q") == 0)
408 gaim_quotedp_decode(encoded_text, &decoded, &dec_len);
409 else if (g_ascii_strcasecmp(encoding, "B") == 0)
410 gaim_base64_decode(encoded_text, &decoded, &dec_len);
411 else
412 decoded = NULL;
413 if (decoded) {
414 gsize len;
415 char *converted = g_convert(decoded, dec_len, "utf-8", charset, NULL, &len, NULL);
416
417 if (converted) {
418 n = strncpy(n, converted, len) + len;
419 g_free(converted);
420 }
421 g_free(decoded);
422 }
423 g_free(charset);
424 g_free(encoding);
425 g_free(encoded_text);
426 state = state_equal2; /* Restart the FSM */
427 } else { /* This should never happen */
428 RECOVER_MARKED_TEXT;
429 state = state_start;
430 }
431 break;
432 default:
433 if (*cur == '=') {
434 mark = cur;
435 state = state_equal1;
436 } else {
437 /* Some unencoded text. */
438 *n = *cur;
439 n += 1;
440 }
441 break;
442 } /* switch */
443 } /* for */
444
445 if (state != state_start) {
446 RECOVER_MARKED_TEXT;
447 }
394 *n = '\0'; 448 *n = '\0';
395
396 /* There is unencoded text at the end. */
397 if (*unencoded)
398 n = strcpy(n, unencoded);
399 449
400 return new; 450 return new;
401 } 451 }
402 452
403 453