comparison libpurple/protocols/irc/parse.c @ 27724:26f55eb6ab59

propagate from branch 'im.pidgin.pidgin' (head 1cb036aecbbe359226e69874379d39ce74c8daf6) to branch 'im.pidgin.pidgin.yaz' (head 99515d90725804d37c9baeec606e2f4e2ec5a61f)
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Wed, 19 Sep 2007 08:27:28 +0000
parents 44b4e8bd759b abc46fd562bd
children 271d154bbb91
comparison
equal deleted inserted replaced
20541:a9022965162b 27724:26f55eb6ab59
33 #include <stdio.h> 33 #include <stdio.h>
34 #include <stdlib.h> 34 #include <stdlib.h>
35 #include <ctype.h> 35 #include <ctype.h>
36 36
37 static char *irc_send_convert(struct irc_conn *irc, const char *string); 37 static char *irc_send_convert(struct irc_conn *irc, const char *string);
38 static char *irc_recv_convert(struct irc_conn *irc, const char *string); 38 static char *irc_recv_convert(struct irc_conn *irc, char *string);
39 39
40 static void irc_parse_error_cb(struct irc_conn *irc, char *input); 40 static void irc_parse_error_cb(struct irc_conn *irc, char *input);
41 41
42 static char *irc_mirc_colors[16] = { 42 static char *irc_mirc_colors[16] = {
43 "white", "black", "blue", "dark green", "red", "brown", "purple", 43 "white", "black", "blue", "dark green", "red", "brown", "purple",
154 { "whois", "tt", irc_cmd_whois, N_("whois [server] &lt;nick&gt;: Get information on a user.") }, 154 { "whois", "tt", irc_cmd_whois, N_("whois [server] &lt;nick&gt;: Get information on a user.") },
155 { "whowas", "t", irc_cmd_whowas, N_("whowas &lt;nick&gt;: Get information on a user that has logged off.") }, 155 { "whowas", "t", irc_cmd_whowas, N_("whowas &lt;nick&gt;: Get information on a user that has logged off.") },
156 { NULL, NULL, NULL, NULL } 156 { NULL, NULL, NULL, NULL }
157 }; 157 };
158 158
159 /* yaz */
160 #define ASCII 0
161 #define KANJI 1
162 #define KANA 2
163 #define ROMAN 3
164 char seq_ascii[] = {0x1B,0x28,0x42,0x00}; /* ESC ( B */
165 char seq_kanji[] = {0x1B,0x24,0x42,0x00}; /* ESC $ B */
166 char seq_kana[] = {0x1B,0x28,0x49,0x00}; /* ESC ( I */
167 char seq_roman[] = {0x1B,0x28,0x4A,0x00}; /* ESC ( J */
168 char *seq[4] = {seq_ascii, seq_kanji, seq_kana, seq_roman};
169 char *jisstate[5] = {"ASCII", "KANJI", "KANA", "ROMAN"};
170 char SO[] = {0x0E,0x00};
171 char SI[] = {0x0F,0x00};
172
159 static PurpleCmdRet irc_parse_purple_cmd(PurpleConversation *conv, const gchar *cmd, 173 static PurpleCmdRet irc_parse_purple_cmd(PurpleConversation *conv, const gchar *cmd,
160 gchar **args, gchar **error, void *data) 174 gchar **args, gchar **error, void *data)
161 { 175 {
162 PurpleConnection *gc; 176 PurpleConnection *gc;
163 struct irc_conn *irc; 177 struct irc_conn *irc;
221 { 235 {
222 char *utf8; 236 char *utf8;
223 GError *err = NULL; 237 GError *err = NULL;
224 gchar **encodings; 238 gchar **encodings;
225 const gchar *enclist; 239 const gchar *enclist;
240 char *escpos = NULL;
241 char *temp = NULL;
242 gboolean iskana = FALSE;
243 char *pos = NULL;
244 gboolean irc_use_sosi, irc_use_8bit;
245 char *strtmp;
246 size_t strtmp_len;
226 247
227 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); 248 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
228 encodings = g_strsplit(enclist, ",", 2); 249 encodings = g_strsplit(enclist, ",", 2);
250
251 irc_use_sosi = purple_account_get_bool(irc->account, "irc_use_sosi", FALSE);
252 irc_use_8bit = purple_account_get_bool(irc->account, "irc_use_8bit", FALSE);
229 253
230 if (encodings[0] == NULL || !g_ascii_strcasecmp("UTF-8", encodings[0])) { 254 if (encodings[0] == NULL || !g_ascii_strcasecmp("UTF-8", encodings[0])) {
231 g_strfreev(encodings); 255 g_strfreev(encodings);
232 return g_strdup(string); 256 return g_strdup(string);
233 } 257 }
234 258
235 utf8 = g_convert(string, strlen(string), encodings[0], "UTF-8", NULL, NULL, &err); 259 strtmp = (char *)sanitize_utf((unsigned char *)string, strlen(string), &strtmp_len);
260 utf8 = g_convert(strtmp, strlen(strtmp), encodings[0], "UTF-8", NULL, NULL, &err);
261
236 if (err) { 262 if (err) {
237 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Send conversion error: %s\n", err->message); 263 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Send conversion error: %s\n", err->message);
238 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", encodings[0]); 264 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", encodings[0]);
239 utf8 = g_strdup(string); 265 utf8 = g_strdup(strtmp);
240 g_error_free(err); 266 g_error_free(err);
241 } 267 }
268
269 /* yaz */
270 if (!strncasecmp("iso-2022-jp", encodings[0], strlen("iso-2022-jp"))) {
271 escpos = strrchr(utf8, 0x1B);
272 if(escpos && (!strncmp(seq_kanji, escpos, 3) || !strncmp(seq_kana, escpos, 3))){
273 char *oldutf8 = utf8;
274 utf8 = g_realloc(utf8, strlen(utf8)+1+3);
275 if(utf8)
276 strncat(utf8, seq_ascii, 3);
277 else
278 utf8 = oldutf8;
279 }
280
281 if(irc_use_sosi || irc_use_8bit){
282 /* SO/SI */
283 //find kana escape and replace with roman+SO
284 temp = g_malloc0(strlen(utf8) * 7); //XXX should be reasonable size
285 pos = utf8;
286 while(pos < utf8+strlen(utf8)){
287 escpos = strchr(pos, 0x1B);
288 if(escpos){
289 if(!strncmp(seq_kana, escpos, 3)){ /* kana found */
290 iskana = TRUE;
291 strncat(temp, pos, escpos-pos);
292 strcat(temp, seq_roman);
293 if(irc_use_sosi)
294 strcat(temp, SO);
295 pos = escpos+3;
296 } else {
297 if(iskana){
298 char *ptr;
299 ptr = temp + strlen(temp);
300 while(pos<escpos){
301 if(irc_use_8bit)
302 *ptr = *pos + 128; // convert to 8bit
303 else
304 *ptr = *pos;
305 ptr++; pos++;
306 }
307 if(irc_use_sosi)
308 strcat(temp, SI);
309 strncat(temp, escpos, 3);
310 pos = escpos+3;
311 iskana = FALSE;
312 } else {
313 strncat(temp, pos, escpos-pos+3); //include esc
314 pos = escpos+3;
315 }
316 }
317 } else { /* escpos == NULL */
318 strcat(temp, pos);
319 break;
320 }
321 }
322 g_free(utf8);
323 utf8 = temp;
324 }
325 }
326
242 g_strfreev(encodings); 327 g_strfreev(encodings);
243 328 g_free(strtmp);
244 return utf8; 329 return utf8;
245 } 330 }
246 331
247 static char *irc_recv_convert(struct irc_conn *irc, const char *string) 332 static char *irc_recv_convert(struct irc_conn *irc, char *string)
248 { 333 {
249 char *utf8 = NULL; 334 char *utf8 = NULL;
250 const gchar *charset, *enclist; 335 const gchar *charset, *enclist;
251 gchar **encodings; 336 gchar **encodings;
252 int i; 337 int i;
338 GError *err;
339 gboolean retry;
340 gsize in_len, out_len;
341 int conv_len;
342 char *strtmp;
343 size_t strtmp_len;
253 344
254 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); 345 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
255 encodings = g_strsplit(enclist, ",", -1); 346 encodings = g_strsplit(enclist, ",", -1);
256 347
257 if (encodings[0] == NULL) { 348 if (encodings[0] == NULL) {
265 charset++; 356 charset++;
266 357
267 if (!g_ascii_strcasecmp("UTF-8", charset)) { 358 if (!g_ascii_strcasecmp("UTF-8", charset)) {
268 if (g_utf8_validate(string, -1, NULL)) 359 if (g_utf8_validate(string, -1, NULL))
269 utf8 = g_strdup(string); 360 utf8 = g_strdup(string);
361 } else if (!strncasecmp("iso-2022-jp-2", charset, strlen("iso-2022-jp-2"))){
362 /* pre-process quirky jis */
363 unsigned char *jisstr;
364 unsigned char *ptr, *ptr2;
365 int state = ASCII;
366 int is8bit = FALSE;
367
368 jisstr = (unsigned char *)calloc(1, strlen(string)*7); /* enough? */
369 ptr = (unsigned char *)string; ptr2 = jisstr;
370
371 while(*ptr){
372 if(*ptr == 0x1B){
373 /* escape sequence. */
374 if(*(ptr+1) == 0x28 && *(ptr+2) == 0x42){
375 state = ASCII;
376
377 } else if(*(ptr+1) == 0x24 && *(ptr+2) == 0x42){
378 state = KANJI;
379
380 } else if(*(ptr+1) == 0x28 && *(ptr+2) == 0x49){
381 state = KANA;
382
383 } else if(*(ptr+1) == 0x28 && *(ptr+2) == 0x4a){
384 state = ROMAN;
385
386 }
387 purple_debug(PURPLE_DEBUG_INFO, "irc", "state %s\n", jisstate[state]);
388 }
389 if(*ptr >= 0xA1 && *ptr <= 0xDF){
390 /* raw 8bit */
391 if(!is8bit){
392 strcat((char *)jisstr, seq[KANA]);
393 ptr2 += 3;
394 is8bit = TRUE;
395 purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit = TRUE\n");
396 }
397 *ptr2 = *ptr - 0x80;
398 ptr++ ; ptr2++;
399 } else {
400 /* 7bit */
401 if(*ptr == 0x0E){
402 /* SO */
403 strcat((char *)jisstr, seq[KANA]);
404 ptr++; ptr2 += 3;
405 purple_debug(PURPLE_DEBUG_INFO, "irc", "SO\n");
406 continue;
407 } else if(*ptr == 0x0F){
408 /* SI */
409 strcat((char *)jisstr, seq[state]);
410 purple_debug(PURPLE_DEBUG_INFO, "irc", "SI to %s\n", jisstate[state]);
411 ptr++; ptr2 += 3;
412 purple_debug(PURPLE_DEBUG_INFO, "irc", "SI\n");
413 continue;
414 }
415 if(is8bit){ /* the edge of 8bit -> 7bit */
416 purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit to %s\n", jisstate[state]);
417 strcat((char *)jisstr, seq[state]);
418 ptr2 += 3;
419 is8bit=FALSE;
420 purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit = FALSE\n");
421 }
422 /* copy str */
423 *ptr2 = *ptr;
424 ptr++; ptr2++;
425 }
426 }
427
428 /* convert & error recovery */
429 do {
430 err = NULL;
431 retry = FALSE;
432
433 conv_len = strlen((char *)jisstr);
434 utf8 = g_convert_with_fallback((char *)jisstr, conv_len, "UTF-8", charset,
435 "?", &in_len, &out_len, &err);
436 if(err != NULL){
437 if(err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE){
438 memmove(jisstr + in_len, jisstr + in_len + 1,
439 conv_len - in_len -1);
440 conv_len--;
441 *(jisstr + conv_len) = '\0';
442 retry = TRUE;
443 }
444 g_error_free(err);
445 }
446 } while(retry);
447
448 if(jisstr)
449 free(jisstr);
450
270 } else { 451 } else {
271 utf8 = g_convert(string, -1, "UTF-8", charset, NULL, NULL, NULL); 452 do {
272 } 453 err = NULL;
273 454 retry = FALSE;
274 if (utf8) { 455
275 g_strfreev(encodings); 456 conv_len = strlen(string);
276 return utf8; 457 utf8 = g_convert_with_fallback(string, conv_len, "UTF-8", charset,
458 "?", &in_len, &out_len, &err);
459 if(err != NULL){
460 if(err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE){
461 memmove(string + in_len, string + in_len + 1,
462 conv_len - in_len -1);
463 conv_len--;
464 *(string + conv_len) = '\0';
465 retry = TRUE;
466 }
467 g_error_free(err);
468 }
469 } while(retry);
470 }
471
472
473 if(utf8){
474 strtmp = (char *)botch_utf((unsigned char *)utf8, strlen(utf8), &strtmp_len);
475 g_strfreev(encodings);
476 g_free(utf8);
477 return strtmp;
277 } 478 }
278 } 479 }
279 g_strfreev(encodings); 480 g_strfreev(encodings);
280 481
281 return purple_utf8_salvage(string); 482 return purple_utf8_salvage(string);