pidgin.yaz: src/util.c comparison

comparison src/util.c @ 8958:60a47725df97

[gaim-migrate @ 9732] " I tried to send myself some test mail with the subject Subject: =?Big5?Q?=B4=FA=B8=D5?= (Chinese for "test"). Gaim, however, displays the following notification: Subject: msnB4=FA=B8=D5?= I tried to partially rewrite gaim_mime_decode_field in util.c to fix this problem; the results of the rewrite is attached. I have tested this and it should work correctly. (This does not fix the MSN mail notification crashes, however.)" --Ambrose C. LI committer: Tailor Script <tailor@pidgin.im>

author	Luke Schierer <lschiere@pidgin.im>
date	Mon, 17 May 2004 02:00:17 +0000
parents	54eba3833e34
children	6f21aa413b18

comparison

equal deleted inserted replaced

-:97a1f314b051
+:60a47725df97
 **************************************************************************/
 char *
 gaim_mime_decode_field(const char *str)
 {
 	/*
-	 * This is revo/shx's version.  It has had some problems with
+	 * This is wing's version, partially based on revo/shx's version
-	 * crashing, but it's probably a better implementation.
+	 * See RFC2047 [which apparently obsoletes RFC1342]
 	 */
+	typedef enum {
+		state_start, state_equal1, state_question1,
+		state_charset, state_question2,
+		state_encoding, state_question3,
+		state_encoded_text, state_question4, state_equal2 = state_start
+	} encoded_word_state_t;
+	encoded_word_state_t state = state_start;
 	const char *cur, *mark;
-	const char *unencoded, *encoded;
+	const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL;
 	char *n, *new;
+	/* token can be any CHAR, not necessarily ASCII */
+	#define token_char_p(c) \
+		(c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c))
+	/* But encoded-text must be ASCII; alas, isascii() may not exist */
+	#define encoded_text_char_p(c) \
+		((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c))
+	#define RECOVER_MARKED_TEXT strncpy(n, mark, cur - mark + 1); \
+		n += cur - mark + 1
+	/* NOTE: Assuming that we need just strlen(str)+1 may be wrong */
 	n = new = g_malloc(strlen(str) + 1);
 	/* Here we will be looking for encoded words and if they seem to be
 	 * valid then decode them.
 	 * They are of this form: =?charset?encoding?text?=
 	 */
-	for (unencoded = cur = str; (encoded = cur = strstr(cur, "=?")); unencoded = cur) {
+	for (cur = str, mark = NULL; *cur; cur += 1) {
-		gboolean found_word = FALSE;
+		switch (state) {
-		int i, num, dec_len;
+		case state_equal1:
-		gsize len;
-		char *decoded, *converted;
-		char *tokens[3];
-		/* Let's look for tokens, they are between ?'s */
-		for (cur += 2, mark = cur, num = 0; *cur; cur++) {
 			if (*cur == '?') {
-				if (num > 2)
+				state = state_question1;
-					/* No more than 3 tokens. */
+			} else {
-					break;
+				RECOVER_MARKED_TEXT;
+				state = state_start;
-				tokens[num++] = g_strndup(mark, cur - mark);
+			}
+			break;
-				mark = (cur + 1);
+		case state_question1:
+			if (token_char_p(*cur)) {
-				if (*mark == '=') {
+				charset0 = cur;
-					found_word = TRUE;
+				state = state_charset;
-					break;
+			} else { /* This should never happen */
-				}
+				RECOVER_MARKED_TEXT;
-			}
+				state = state_start;
-#if 0
+			}
-			/* I think this is rarely going to happen, if at all */
+			break;
-			else if ((num < 2) && (strchr("()<>@,;:/[]", *cur)))
+		case state_charset:
-				/* There can't be these characters in the first two tokens. */
+			if (*cur == '?') {
-				break;
+				state = state_question2;
-			else if ((num == 2) && (*cur == ' '))
+			} else if (!token_char_p(*cur)) {
-				/* There can't be spaces in the third token. */
+				RECOVER_MARKED_TEXT;
-				break;
+				state = state_start;
-#endif
+			}
-		}
+			break;
+		case state_question2:
-		cur += 2;
+			if (token_char_p(*cur)) {
+				encoding0 = cur;
-		if (found_word) {
+				state = state_encoding;
-			/* We found an encoded word. */
+			} else { /* This should never happen */
-			/* =?charset?encoding?text?= */
+				RECOVER_MARKED_TEXT;
+				state = state_start;
-			/* Some unencoded text. */
+			}
-			len = encoded - unencoded;
+			break;
-			n = strncpy(n, unencoded, len) + len;
+		case state_encoding:
+			if (*cur == '?') {
-			if (g_ascii_strcasecmp(tokens[1], "Q") == 0)
+				state = state_question3;
-				gaim_quotedp_decode(tokens[2], &decoded, &dec_len);
+			} else if (!token_char_p(*cur)) {
-			else if (g_ascii_strcasecmp(tokens[1], "B") == 0)
+				RECOVER_MARKED_TEXT;
-				gaim_base64_decode(tokens[2], &decoded, &dec_len);
+				state = state_start;
-			else
+			}
-				decoded = NULL;
+			break;
+		case state_question3:
-			if (decoded) {
+			if (encoded_text_char_p(*cur)) {
-				converted = g_convert(decoded, dec_len, "utf-8", tokens[0], NULL, &len, NULL);
+				encoded_text0 = cur;
+				state = state_encoded_text;
-				if (converted) {
+			} else { /* This should never happen */
-					n = strncpy(n, converted, len) + len;
+				RECOVER_MARKED_TEXT;
-					g_free(converted);
+				state = state_start;
-				} else if (len) {
+			}
-					converted = g_convert(decoded, len, "utf-8", tokens[0], NULL, &len, NULL);
+			break;
-					n = strncpy(n, converted, len) + len;
+		case state_encoded_text:
-					g_free(converted);
+			if (*cur == '?') {
-				}
+				state = state_question4;
-				g_free(decoded);
+			} else if (!encoded_text_char_p(*cur)) {
-			}
+				RECOVER_MARKED_TEXT;
-		} else {
+				state = state_start;
-			/* Some unencoded text. */
+			}
-			len = cur - unencoded;
+			break;
-			n = strncpy(n, unencoded, len) + len;
+		case state_question4:
-		}
+			if (*cur == '=') { /* Got the whole encoded-word */
+				char *charset = g_strndup(charset0, encoding0 - charset0 - 1);
-		for (i = 0; i < num; i++)
+				char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1);
-			g_free(tokens[i]);
+				char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1);
-	}
+				char *decoded = NULL;
+				int dec_len;
+				if (g_ascii_strcasecmp(encoding, "Q") == 0)
+					gaim_quotedp_decode(encoded_text, &decoded, &dec_len);
+				else if (g_ascii_strcasecmp(encoding, "B") == 0)
+					gaim_base64_decode(encoded_text, &decoded, &dec_len);
+				else
+					decoded = NULL;
+				if (decoded) {
+					gsize len;
+					char *converted = g_convert(decoded, dec_len, "utf-8", charset, NULL, &len, NULL);
+					if (converted) {
+						n = strncpy(n, converted, len) + len;
+						g_free(converted);
+					}
+					g_free(decoded);
+				}
+				g_free(charset);
+				g_free(encoding);
+				g_free(encoded_text);
+				state = state_equal2; /* Restart the FSM */
+			} else { /* This should never happen */
+				RECOVER_MARKED_TEXT;
+				state = state_start;
+			}
+			break;
+		default:
+			if (*cur == '=') {
+				mark = cur;
+				state = state_equal1;
+			} else {
+				/* Some unencoded text. */
+				*n = *cur;
+				n += 1;
+			}
+			break;
+		} /* switch */
+	} /* for */
+	if (state != state_start) {
+		RECOVER_MARKED_TEXT;
+	}
 	*n = '\0';
-	/* There is unencoded text at the end. */
-	if (*unencoded)
-		n = strcpy(n, unencoded);
 	return new;
 }

Mercurial > pidgin.yaz

comparison src/util.c @ 8958:60a47725df97