Mercurial > pidgin.yaz
comparison src/util.c @ 8958:60a47725df97
[gaim-migrate @ 9732]
" I tried to send myself some test mail with the subject
Subject: =?Big5?Q?=B4=FA=B8=D5?=
(Chinese for "test"). Gaim, however, displays the
following notification:
Subject: msnB4=FA=B8=D5?=
I tried to partially rewrite gaim_mime_decode_field in
util.c to fix this problem; the results of the rewrite
is attached.
I have tested this and it should work correctly.
(This does not fix the MSN mail notification crashes,
however.)" --Ambrose C. LI
committer: Tailor Script <tailor@pidgin.im>
author | Luke Schierer <lschiere@pidgin.im> |
---|---|
date | Mon, 17 May 2004 02:00:17 +0000 |
parents | 54eba3833e34 |
children | 6f21aa413b18 |
comparison
equal
deleted
inserted
replaced
8957:97a1f314b051 | 8958:60a47725df97 |
---|---|
301 **************************************************************************/ | 301 **************************************************************************/ |
302 char * | 302 char * |
303 gaim_mime_decode_field(const char *str) | 303 gaim_mime_decode_field(const char *str) |
304 { | 304 { |
305 /* | 305 /* |
306 * This is revo/shx's version. It has had some problems with | 306 * This is wing's version, partially based on revo/shx's version |
307 * crashing, but it's probably a better implementation. | 307 * See RFC2047 [which apparently obsoletes RFC1342] |
308 */ | 308 */ |
309 typedef enum { | |
310 state_start, state_equal1, state_question1, | |
311 state_charset, state_question2, | |
312 state_encoding, state_question3, | |
313 state_encoded_text, state_question4, state_equal2 = state_start | |
314 } encoded_word_state_t; | |
315 encoded_word_state_t state = state_start; | |
309 const char *cur, *mark; | 316 const char *cur, *mark; |
310 const char *unencoded, *encoded; | 317 const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL; |
311 char *n, *new; | 318 char *n, *new; |
312 | 319 |
320 /* token can be any CHAR, not necessarily ASCII */ | |
321 #define token_char_p(c) \ | |
322 (c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c)) | |
323 | |
324 /* But encoded-text must be ASCII; alas, isascii() may not exist */ | |
325 #define encoded_text_char_p(c) \ | |
326 ((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c)) | |
327 | |
328 #define RECOVER_MARKED_TEXT strncpy(n, mark, cur - mark + 1); \ | |
329 n += cur - mark + 1 | |
330 | |
331 /* NOTE: Assuming that we need just strlen(str)+1 may be wrong */ | |
313 n = new = g_malloc(strlen(str) + 1); | 332 n = new = g_malloc(strlen(str) + 1); |
314 | 333 |
315 /* Here we will be looking for encoded words and if they seem to be | 334 /* Here we will be looking for encoded words and if they seem to be |
316 * valid then decode them. | 335 * valid then decode them. |
317 * They are of this form: =?charset?encoding?text?= | 336 * They are of this form: =?charset?encoding?text?= |
318 */ | 337 */ |
319 | 338 |
320 for (unencoded = cur = str; (encoded = cur = strstr(cur, "=?")); unencoded = cur) { | 339 for (cur = str, mark = NULL; *cur; cur += 1) { |
321 gboolean found_word = FALSE; | 340 switch (state) { |
322 int i, num, dec_len; | 341 case state_equal1: |
323 gsize len; | |
324 char *decoded, *converted; | |
325 char *tokens[3]; | |
326 | |
327 /* Let's look for tokens, they are between ?'s */ | |
328 for (cur += 2, mark = cur, num = 0; *cur; cur++) { | |
329 if (*cur == '?') { | 342 if (*cur == '?') { |
330 if (num > 2) | 343 state = state_question1; |
331 /* No more than 3 tokens. */ | 344 } else { |
332 break; | 345 RECOVER_MARKED_TEXT; |
333 | 346 state = state_start; |
334 tokens[num++] = g_strndup(mark, cur - mark); | 347 } |
335 | 348 break; |
336 mark = (cur + 1); | 349 case state_question1: |
337 | 350 if (token_char_p(*cur)) { |
338 if (*mark == '=') { | 351 charset0 = cur; |
339 found_word = TRUE; | 352 state = state_charset; |
340 break; | 353 } else { /* This should never happen */ |
341 } | 354 RECOVER_MARKED_TEXT; |
342 } | 355 state = state_start; |
343 #if 0 | 356 } |
344 /* I think this is rarely going to happen, if at all */ | 357 break; |
345 else if ((num < 2) && (strchr("()<>@,;:/[]", *cur))) | 358 case state_charset: |
346 /* There can't be these characters in the first two tokens. */ | 359 if (*cur == '?') { |
347 break; | 360 state = state_question2; |
348 else if ((num == 2) && (*cur == ' ')) | 361 } else if (!token_char_p(*cur)) { |
349 /* There can't be spaces in the third token. */ | 362 RECOVER_MARKED_TEXT; |
350 break; | 363 state = state_start; |
351 #endif | 364 } |
352 } | 365 break; |
353 | 366 case state_question2: |
354 cur += 2; | 367 if (token_char_p(*cur)) { |
355 | 368 encoding0 = cur; |
356 if (found_word) { | 369 state = state_encoding; |
357 /* We found an encoded word. */ | 370 } else { /* This should never happen */ |
358 /* =?charset?encoding?text?= */ | 371 RECOVER_MARKED_TEXT; |
359 | 372 state = state_start; |
360 /* Some unencoded text. */ | 373 } |
361 len = encoded - unencoded; | 374 break; |
362 n = strncpy(n, unencoded, len) + len; | 375 case state_encoding: |
363 | 376 if (*cur == '?') { |
364 if (g_ascii_strcasecmp(tokens[1], "Q") == 0) | 377 state = state_question3; |
365 gaim_quotedp_decode(tokens[2], &decoded, &dec_len); | 378 } else if (!token_char_p(*cur)) { |
366 else if (g_ascii_strcasecmp(tokens[1], "B") == 0) | 379 RECOVER_MARKED_TEXT; |
367 gaim_base64_decode(tokens[2], &decoded, &dec_len); | 380 state = state_start; |
368 else | 381 } |
369 decoded = NULL; | 382 break; |
370 | 383 case state_question3: |
371 if (decoded) { | 384 if (encoded_text_char_p(*cur)) { |
372 converted = g_convert(decoded, dec_len, "utf-8", tokens[0], NULL, &len, NULL); | 385 encoded_text0 = cur; |
373 | 386 state = state_encoded_text; |
374 if (converted) { | 387 } else { /* This should never happen */ |
375 n = strncpy(n, converted, len) + len; | 388 RECOVER_MARKED_TEXT; |
376 g_free(converted); | 389 state = state_start; |
377 } else if (len) { | 390 } |
378 converted = g_convert(decoded, len, "utf-8", tokens[0], NULL, &len, NULL); | 391 break; |
379 n = strncpy(n, converted, len) + len; | 392 case state_encoded_text: |
380 g_free(converted); | 393 if (*cur == '?') { |
381 } | 394 state = state_question4; |
382 g_free(decoded); | 395 } else if (!encoded_text_char_p(*cur)) { |
383 } | 396 RECOVER_MARKED_TEXT; |
384 } else { | 397 state = state_start; |
385 /* Some unencoded text. */ | 398 } |
386 len = cur - unencoded; | 399 break; |
387 n = strncpy(n, unencoded, len) + len; | 400 case state_question4: |
388 } | 401 if (*cur == '=') { /* Got the whole encoded-word */ |
389 | 402 char *charset = g_strndup(charset0, encoding0 - charset0 - 1); |
390 for (i = 0; i < num; i++) | 403 char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1); |
391 g_free(tokens[i]); | 404 char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1); |
392 } | 405 char *decoded = NULL; |
393 | 406 int dec_len; |
407 if (g_ascii_strcasecmp(encoding, "Q") == 0) | |
408 gaim_quotedp_decode(encoded_text, &decoded, &dec_len); | |
409 else if (g_ascii_strcasecmp(encoding, "B") == 0) | |
410 gaim_base64_decode(encoded_text, &decoded, &dec_len); | |
411 else | |
412 decoded = NULL; | |
413 if (decoded) { | |
414 gsize len; | |
415 char *converted = g_convert(decoded, dec_len, "utf-8", charset, NULL, &len, NULL); | |
416 | |
417 if (converted) { | |
418 n = strncpy(n, converted, len) + len; | |
419 g_free(converted); | |
420 } | |
421 g_free(decoded); | |
422 } | |
423 g_free(charset); | |
424 g_free(encoding); | |
425 g_free(encoded_text); | |
426 state = state_equal2; /* Restart the FSM */ | |
427 } else { /* This should never happen */ | |
428 RECOVER_MARKED_TEXT; | |
429 state = state_start; | |
430 } | |
431 break; | |
432 default: | |
433 if (*cur == '=') { | |
434 mark = cur; | |
435 state = state_equal1; | |
436 } else { | |
437 /* Some unencoded text. */ | |
438 *n = *cur; | |
439 n += 1; | |
440 } | |
441 break; | |
442 } /* switch */ | |
443 } /* for */ | |
444 | |
445 if (state != state_start) { | |
446 RECOVER_MARKED_TEXT; | |
447 } | |
394 *n = '\0'; | 448 *n = '\0'; |
395 | |
396 /* There is unencoded text at the end. */ | |
397 if (*unencoded) | |
398 n = strcpy(n, unencoded); | |
399 | 449 |
400 return new; | 450 return new; |
401 } | 451 } |
402 | 452 |
403 | 453 |