Mercurial > pidgin
changeset 8958:60a47725df97
[gaim-migrate @ 9732]
" I tried to send myself some test mail with the subject
Subject: =?Big5?Q?=B4=FA=B8=D5?=
(Chinese for "test"). Gaim, however, displays the
following notification:
Subject: msnB4=FA=B8=D5?=
I tried to partially rewrite gaim_mime_decode_field in
util.c to fix this problem; the results of the rewrite
is attached.
I have tested this and it should work correctly.
(This does not fix the MSN mail notification crashes,
however.)" --Ambrose C. LI
committer: Tailor Script <tailor@pidgin.im>
author | Luke Schierer <lschiere@pidgin.im> |
---|---|
date | Mon, 17 May 2004 02:00:17 +0000 |
parents | 97a1f314b051 |
children | bfc8dea58386 |
files | src/util.c |
diffstat | 1 files changed, 126 insertions(+), 76 deletions(-) [+] |
line wrap: on
line diff
--- a/src/util.c Sun May 16 22:43:41 2004 +0000 +++ b/src/util.c Mon May 17 02:00:17 2004 +0000 @@ -303,13 +303,32 @@ gaim_mime_decode_field(const char *str) { /* - * This is revo/shx's version. It has had some problems with - * crashing, but it's probably a better implementation. + * This is wing's version, partially based on revo/shx's version + * See RFC2047 [which apparently obsoletes RFC1342] */ + typedef enum { + state_start, state_equal1, state_question1, + state_charset, state_question2, + state_encoding, state_question3, + state_encoded_text, state_question4, state_equal2 = state_start + } encoded_word_state_t; + encoded_word_state_t state = state_start; const char *cur, *mark; - const char *unencoded, *encoded; + const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL; char *n, *new; + /* token can be any CHAR, not necessarily ASCII */ + #define token_char_p(c) \ + (c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c)) + + /* But encoded-text must be ASCII; alas, isascii() may not exist */ + #define encoded_text_char_p(c) \ + ((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c)) + + #define RECOVER_MARKED_TEXT strncpy(n, mark, cur - mark + 1); \ + n += cur - mark + 1 + + /* NOTE: Assuming that we need just strlen(str)+1 may be wrong */ n = new = g_malloc(strlen(str) + 1); /* Here we will be looking for encoded words and if they seem to be @@ -317,86 +336,117 @@ * They are of this form: =?charset?encoding?text?= */ - for (unencoded = cur = str; (encoded = cur = strstr(cur, "=?")); unencoded = cur) { - gboolean found_word = FALSE; - int i, num, dec_len; - gsize len; - char *decoded, *converted; - char *tokens[3]; - - /* Let's look for tokens, they are between ?'s */ - for (cur += 2, mark = cur, num = 0; *cur; cur++) { + for (cur = str, mark = NULL; *cur; cur += 1) { + switch (state) { + case state_equal1: + if (*cur == '?') { + state = state_question1; + } else { + RECOVER_MARKED_TEXT; + state = state_start; + } + break; + case state_question1: + if (token_char_p(*cur)) { + charset0 = cur; + state = state_charset; + } else { /* This should never happen */ + RECOVER_MARKED_TEXT; + state = state_start; + } + break; + case state_charset: if (*cur == '?') { - if (num > 2) - /* No more than 3 tokens. */ - break; - - tokens[num++] = g_strndup(mark, cur - mark); - - mark = (cur + 1); - - if (*mark == '=') { - found_word = TRUE; - break; - } + state = state_question2; + } else if (!token_char_p(*cur)) { + RECOVER_MARKED_TEXT; + state = state_start; + } + break; + case state_question2: + if (token_char_p(*cur)) { + encoding0 = cur; + state = state_encoding; + } else { /* This should never happen */ + RECOVER_MARKED_TEXT; + state = state_start; + } + break; + case state_encoding: + if (*cur == '?') { + state = state_question3; + } else if (!token_char_p(*cur)) { + RECOVER_MARKED_TEXT; + state = state_start; + } + break; + case state_question3: + if (encoded_text_char_p(*cur)) { + encoded_text0 = cur; + state = state_encoded_text; + } else { /* This should never happen */ + RECOVER_MARKED_TEXT; + state = state_start; } -#if 0 - /* I think this is rarely going to happen, if at all */ - else if ((num < 2) && (strchr("()<>@,;:/[]", *cur))) - /* There can't be these characters in the first two tokens. */ - break; - else if ((num == 2) && (*cur == ' ')) - /* There can't be spaces in the third token. */ - break; -#endif - } - - cur += 2; - - if (found_word) { - /* We found an encoded word. */ - /* =?charset?encoding?text?= */ - - /* Some unencoded text. */ - len = encoded - unencoded; - n = strncpy(n, unencoded, len) + len; - - if (g_ascii_strcasecmp(tokens[1], "Q") == 0) - gaim_quotedp_decode(tokens[2], &decoded, &dec_len); - else if (g_ascii_strcasecmp(tokens[1], "B") == 0) - gaim_base64_decode(tokens[2], &decoded, &dec_len); - else - decoded = NULL; - - if (decoded) { - converted = g_convert(decoded, dec_len, "utf-8", tokens[0], NULL, &len, NULL); - - if (converted) { - n = strncpy(n, converted, len) + len; - g_free(converted); - } else if (len) { - converted = g_convert(decoded, len, "utf-8", tokens[0], NULL, &len, NULL); - n = strncpy(n, converted, len) + len; - g_free(converted); + break; + case state_encoded_text: + if (*cur == '?') { + state = state_question4; + } else if (!encoded_text_char_p(*cur)) { + RECOVER_MARKED_TEXT; + state = state_start; + } + break; + case state_question4: + if (*cur == '=') { /* Got the whole encoded-word */ + char *charset = g_strndup(charset0, encoding0 - charset0 - 1); + char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1); + char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1); + char *decoded = NULL; + int dec_len; + if (g_ascii_strcasecmp(encoding, "Q") == 0) + gaim_quotedp_decode(encoded_text, &decoded, &dec_len); + else if (g_ascii_strcasecmp(encoding, "B") == 0) + gaim_base64_decode(encoded_text, &decoded, &dec_len); + else + decoded = NULL; + if (decoded) { + gsize len; + char *converted = g_convert(decoded, dec_len, "utf-8", charset, NULL, &len, NULL); + + if (converted) { + n = strncpy(n, converted, len) + len; + g_free(converted); + } + g_free(decoded); } - g_free(decoded); + g_free(charset); + g_free(encoding); + g_free(encoded_text); + state = state_equal2; /* Restart the FSM */ + } else { /* This should never happen */ + RECOVER_MARKED_TEXT; + state = state_start; } - } else { - /* Some unencoded text. */ - len = cur - unencoded; - n = strncpy(n, unencoded, len) + len; - } - - for (i = 0; i < num; i++) - g_free(tokens[i]); + break; + default: + if (*cur == '=') { + mark = cur; + state = state_equal1; + } else { + /* Some unencoded text. */ + *n = *cur; + n += 1; + } + break; + } /* switch */ + } /* for */ + + if (state != state_start) { + RECOVER_MARKED_TEXT; } - *n = '\0'; - /* There is unencoded text at the end. */ - if (*unencoded) - n = strcpy(n, unencoded); - return new; }