changeset 25504:e556eb2f38d2

- add hankaku kana support to irc.
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Sun, 10 Jun 2007 18:06:26 +0000
parents 7577706bde9c
children e7cc85ad9a6e
files libpurple/protocols/irc/irc.c libpurple/protocols/irc/parse.c pidgin/gtkconv.c
diffstat 3 files changed, 204 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/libpurple/protocols/irc/irc.c	Sun Jun 10 16:36:58 2007 +0000
+++ b/libpurple/protocols/irc/irc.c	Sun Jun 10 18:06:26 2007 +0000
@@ -947,6 +947,12 @@
 	option = purple_account_option_string_new(_("Encodings"), "encoding", IRC_DEFAULT_CHARSET);
 	prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option);
 
+	option = purple_account_option_bool_new(_("Use SO/SI to send hankaku kana"), "irc_use_sosi", FALSE);
+	prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option);
+
+	option = purple_account_option_bool_new(_("Use 8bit to send hankaku kana"), "irc_use_8bit", FALSE);
+	prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option);
+
 	option = purple_account_option_string_new(_("Username"), "username", "");
 	prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option);
 
--- a/libpurple/protocols/irc/parse.c	Sun Jun 10 16:36:58 2007 +0000
+++ b/libpurple/protocols/irc/parse.c	Sun Jun 10 18:06:26 2007 +0000
@@ -35,7 +35,7 @@
 #include <ctype.h>
 
 static char *irc_send_convert(struct irc_conn *irc, const char *string);
-static char *irc_recv_convert(struct irc_conn *irc, const char *string);
+static char *irc_recv_convert(struct irc_conn *irc, char *string);
 
 static void irc_parse_error_cb(struct irc_conn *irc, char *input);
 
@@ -156,6 +156,20 @@
 	{ NULL, NULL, NULL, NULL }
 };
 
+/* yaz */
+#define ASCII	0
+#define KANJI	1
+#define KANA	2
+#define ROMAN	3
+char seq_ascii[] = {0x1B,0x28,0x42,0x00}; /* ESC ( B */
+char seq_kanji[] = {0x1B,0x24,0x42,0x00}; /* ESC $ B */
+char seq_kana[]  = {0x1B,0x28,0x49,0x00}; /* ESC ( I */
+char seq_roman[] = {0x1B,0x28,0x4A,0x00}; /* ESC ( J */
+char *seq[4] = {seq_ascii, seq_kanji, seq_kana, seq_roman};
+char *jisstate[5] = {"ASCII", "KANJI", "KANA", "ROMAN"};
+char SO[] = {0x0E,0x00};
+char SI[] = {0x0F,0x00};
+
 static PurpleCmdRet irc_parse_purple_cmd(PurpleConversation *conv, const gchar *cmd,
                                         gchar **args, gchar **error, void *data)
 {
@@ -223,10 +237,18 @@
 	GError *err = NULL;
 	gchar **encodings;
 	const gchar *enclist;
+	char *escpos = NULL;
+	char *temp = NULL;
+	gboolean iskana = FALSE;
+	char *pos = NULL;
+	gboolean irc_use_sosi, irc_use_8bit;
 
 	enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
 	encodings = g_strsplit(enclist, ",", 2);
 
+	irc_use_sosi = purple_account_get_bool(irc->account, "irc_use_sosi", FALSE);
+	irc_use_8bit = purple_account_get_bool(irc->account, "irc_use_8bit", FALSE);
+
 	if (encodings[0] == NULL || !g_ascii_strcasecmp("UTF-8", encodings[0])) {
 		g_strfreev(encodings);
 		return g_strdup(string);
@@ -239,17 +261,79 @@
 		utf8 = g_strdup(string);
 		g_error_free(err);
 	}
+
+	/* yaz */
+	if (!strncasecmp("iso-2022-jp", encodings[0], strlen("iso-2022-jp"))) {
+		escpos = strrchr(utf8, 0x1B);
+		if(escpos && (!strncmp(seq_kanji, escpos, 3) || !strncmp(seq_kana, escpos, 3))){
+			char *oldutf8 = utf8;
+			utf8 = g_realloc(utf8, strlen(utf8)+1+3);
+			if(utf8)
+				strncat(utf8, seq_ascii, 3);
+			else
+				utf8 = oldutf8;
+		}
+
+		if(irc_use_sosi || irc_use_8bit){
+			/* SO/SI */
+			//find kana escape and replace with roman+SO
+			temp = g_malloc0(strlen(utf8) * 7); //XXX should be reasonable size
+			pos = utf8;
+			while(pos < utf8+strlen(utf8)){
+				escpos = strchr(pos, 0x1B);
+				if(escpos){
+					if(!strncmp(seq_kana, escpos, 3)){ /* kana found */
+						iskana = TRUE;
+						strncat(temp, pos, escpos-pos);
+						strcat(temp, seq_roman);
+						if(irc_use_sosi)
+							strcat(temp, SO);
+						pos = escpos+3;
+					} else {
+						if(iskana){
+							char *ptr;
+							ptr = temp + strlen(temp);
+							while(pos<escpos){
+								if(irc_use_8bit)
+									*ptr = *pos + 128; // convert to 8bit
+								else
+									*ptr = *pos;
+								ptr++; pos++;
+							}
+							if(irc_use_sosi)
+								strcat(temp, SI);
+							strncat(temp, escpos, 3);
+							pos = escpos+3;
+							iskana = FALSE;
+						} else {
+							strncat(temp, pos, escpos-pos+3); //include esc
+							pos = escpos+3;
+						}
+					}
+				} else { /* escpos == NULL */
+					strcat(temp, pos);
+					break;
+				}
+			}
+			g_free(utf8);
+			utf8 = temp;
+		}
+	}
+
 	g_strfreev(encodings);
-
 	return utf8;
 }
 
-static char *irc_recv_convert(struct irc_conn *irc, const char *string)
+static char *irc_recv_convert(struct irc_conn *irc, char *string)
 {
 	char *utf8 = NULL;
 	const gchar *charset, *enclist;
 	gchar **encodings;
 	int i;
+	GError *err;
+	gboolean retry;
+	gsize in_len, out_len;
+	int conv_len;
 
 	enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
 	encodings = g_strsplit(enclist, ",", -1);
@@ -267,11 +351,119 @@
 		if (!g_ascii_strcasecmp("UTF-8", charset)) {
 			if (g_utf8_validate(string, -1, NULL))
 				utf8 = g_strdup(string);
+		} else if (!strncasecmp("iso-2022-jp-2", charset, strlen("iso-2022-jp-2"))){
+			/* pre-process quirky jis */
+			unsigned char *jisstr;
+			unsigned char *ptr, *ptr2;
+			int state = ASCII;
+			int is8bit = FALSE;
+
+			jisstr = (unsigned char *)calloc(1, strlen(string)*7); /* enough? */
+			ptr = (unsigned char *)string; ptr2 = jisstr;
+
+			while(*ptr){
+				if(*ptr == 0x1B){
+					/* escape sequence. */
+					if(*(ptr+1) == 0x28 && *(ptr+2) == 0x42){
+						state = ASCII;
+
+					} else if(*(ptr+1) == 0x24 && *(ptr+2) == 0x42){
+						state = KANJI;
+
+					} else if(*(ptr+1) == 0x28 && *(ptr+2) == 0x49){
+						state = KANA;
+
+					} else if(*(ptr+1) == 0x28 && *(ptr+2) == 0x4a){
+						state = ROMAN;
+
+					}
+					purple_debug(PURPLE_DEBUG_INFO, "irc", "state %s\n", jisstate[state]);
+				}
+				if(*ptr >= 0xA1 && *ptr <= 0xDF){
+					/* raw 8bit */
+					if(!is8bit){
+						strcat((char *)jisstr, seq[KANA]);
+						ptr2 += 3;
+						is8bit = TRUE;
+						purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit = TRUE\n");
+					}
+					*ptr2 = *ptr - 0x80;
+					ptr++ ; ptr2++;
+				} else {
+					/* 7bit */
+					if(*ptr == 0x0E){
+						/* SO */
+						strcat((char *)jisstr, seq[KANA]);
+						ptr++; ptr2 += 3;
+						purple_debug(PURPLE_DEBUG_INFO, "irc", "SO\n");
+						continue;
+					} else if(*ptr == 0x0F){
+						/* SI */
+						strcat((char *)jisstr, seq[state]);
+						purple_debug(PURPLE_DEBUG_INFO, "irc", "SI to %s\n", jisstate[state]);
+						ptr++; ptr2 += 3;
+						purple_debug(PURPLE_DEBUG_INFO, "irc", "SI\n");
+						continue;
+					}
+					if(is8bit){ /* the edge of 8bit -> 7bit */
+						purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit to %s\n", jisstate[state]);
+						strcat((char *)jisstr, seq[state]);
+						ptr2 += 3;
+						is8bit=FALSE;
+						purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit = FALSE\n");
+					}
+					/* copy str */
+					*ptr2 = *ptr;
+					ptr++; ptr2++;
+				}
+			}
+
+			/* convert & error recovery */
+			do {
+				err = NULL;
+				retry = FALSE;
+
+				conv_len = strlen((char *)jisstr);
+				utf8 = g_convert_with_fallback((char *)jisstr, conv_len, "UTF-8", charset,
+							       "?", &in_len, &out_len, &err);
+				if(err != NULL){
+					if(err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE){
+						memmove(jisstr + in_len, jisstr + in_len + 1,
+							conv_len - in_len -1);
+						conv_len--;
+						*(jisstr + conv_len) = '\0';
+						retry = TRUE;
+					}
+					g_error_free(err);
+				}
+			} while(retry);
+
+			if(jisstr)
+				free(jisstr);
+
 		} else {
-			utf8 = g_convert(string, -1, "UTF-8", charset, NULL, NULL, NULL);
+			do {
+				err = NULL;
+				retry = FALSE;
+
+				conv_len = strlen(string);
+				utf8 = g_convert_with_fallback(string, conv_len, "UTF-8", charset,
+							       "?", &in_len, &out_len, &err);
+				if(err != NULL){
+					if(err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE){
+						memmove(string + in_len, string + in_len + 1,
+							 conv_len - in_len -1);
+						conv_len--;
+						*(string + conv_len) = '\0';
+						retry = TRUE;
+					}
+					g_error_free(err);
+				}
+			} while(retry);
 		}
 
-		if (utf8) {
+
+		if(utf8){
 			g_strfreev(encodings);
 			return utf8;
 		}
--- a/pidgin/gtkconv.c	Sun Jun 10 16:36:58 2007 +0000
+++ b/pidgin/gtkconv.c	Sun Jun 10 18:06:26 2007 +0000
@@ -3449,7 +3449,7 @@
 #if ENABLE_SHORTCUT
 	win->menu.send_to = gtk_menu_item_new_with_mnemonic(_("_Send To"));
 #else
-	win->menu.send_to = gtk_menu_item_new_with_mnemonic(_("_Send To"));
+	win->menu.send_to = gtk_menu_item_new_with_mnemonic(_("Send To")); //to free Alt-s. intentional. --yaz
 #endif
 	gtk_widget_show(win->menu.send_to);