Mercurial > pidgin.yaz
comparison libpurple/protocols/irc/parse.c @ 27724:26f55eb6ab59
propagate from branch 'im.pidgin.pidgin' (head 1cb036aecbbe359226e69874379d39ce74c8daf6)
to branch 'im.pidgin.pidgin.yaz' (head 99515d90725804d37c9baeec606e2f4e2ec5a61f)
author | Yoshiki Yazawa <yaz@honeyplanet.jp> |
---|---|
date | Wed, 19 Sep 2007 08:27:28 +0000 |
parents | 44b4e8bd759b abc46fd562bd |
children | 271d154bbb91 |
comparison
equal
deleted
inserted
replaced
20541:a9022965162b | 27724:26f55eb6ab59 |
---|---|
33 #include <stdio.h> | 33 #include <stdio.h> |
34 #include <stdlib.h> | 34 #include <stdlib.h> |
35 #include <ctype.h> | 35 #include <ctype.h> |
36 | 36 |
37 static char *irc_send_convert(struct irc_conn *irc, const char *string); | 37 static char *irc_send_convert(struct irc_conn *irc, const char *string); |
38 static char *irc_recv_convert(struct irc_conn *irc, const char *string); | 38 static char *irc_recv_convert(struct irc_conn *irc, char *string); |
39 | 39 |
40 static void irc_parse_error_cb(struct irc_conn *irc, char *input); | 40 static void irc_parse_error_cb(struct irc_conn *irc, char *input); |
41 | 41 |
42 static char *irc_mirc_colors[16] = { | 42 static char *irc_mirc_colors[16] = { |
43 "white", "black", "blue", "dark green", "red", "brown", "purple", | 43 "white", "black", "blue", "dark green", "red", "brown", "purple", |
154 { "whois", "tt", irc_cmd_whois, N_("whois [server] <nick>: Get information on a user.") }, | 154 { "whois", "tt", irc_cmd_whois, N_("whois [server] <nick>: Get information on a user.") }, |
155 { "whowas", "t", irc_cmd_whowas, N_("whowas <nick>: Get information on a user that has logged off.") }, | 155 { "whowas", "t", irc_cmd_whowas, N_("whowas <nick>: Get information on a user that has logged off.") }, |
156 { NULL, NULL, NULL, NULL } | 156 { NULL, NULL, NULL, NULL } |
157 }; | 157 }; |
158 | 158 |
159 /* yaz */ | |
160 #define ASCII 0 | |
161 #define KANJI 1 | |
162 #define KANA 2 | |
163 #define ROMAN 3 | |
164 char seq_ascii[] = {0x1B,0x28,0x42,0x00}; /* ESC ( B */ | |
165 char seq_kanji[] = {0x1B,0x24,0x42,0x00}; /* ESC $ B */ | |
166 char seq_kana[] = {0x1B,0x28,0x49,0x00}; /* ESC ( I */ | |
167 char seq_roman[] = {0x1B,0x28,0x4A,0x00}; /* ESC ( J */ | |
168 char *seq[4] = {seq_ascii, seq_kanji, seq_kana, seq_roman}; | |
169 char *jisstate[5] = {"ASCII", "KANJI", "KANA", "ROMAN"}; | |
170 char SO[] = {0x0E,0x00}; | |
171 char SI[] = {0x0F,0x00}; | |
172 | |
159 static PurpleCmdRet irc_parse_purple_cmd(PurpleConversation *conv, const gchar *cmd, | 173 static PurpleCmdRet irc_parse_purple_cmd(PurpleConversation *conv, const gchar *cmd, |
160 gchar **args, gchar **error, void *data) | 174 gchar **args, gchar **error, void *data) |
161 { | 175 { |
162 PurpleConnection *gc; | 176 PurpleConnection *gc; |
163 struct irc_conn *irc; | 177 struct irc_conn *irc; |
221 { | 235 { |
222 char *utf8; | 236 char *utf8; |
223 GError *err = NULL; | 237 GError *err = NULL; |
224 gchar **encodings; | 238 gchar **encodings; |
225 const gchar *enclist; | 239 const gchar *enclist; |
240 char *escpos = NULL; | |
241 char *temp = NULL; | |
242 gboolean iskana = FALSE; | |
243 char *pos = NULL; | |
244 gboolean irc_use_sosi, irc_use_8bit; | |
245 char *strtmp; | |
246 size_t strtmp_len; | |
226 | 247 |
227 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); | 248 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); |
228 encodings = g_strsplit(enclist, ",", 2); | 249 encodings = g_strsplit(enclist, ",", 2); |
250 | |
251 irc_use_sosi = purple_account_get_bool(irc->account, "irc_use_sosi", FALSE); | |
252 irc_use_8bit = purple_account_get_bool(irc->account, "irc_use_8bit", FALSE); | |
229 | 253 |
230 if (encodings[0] == NULL || !g_ascii_strcasecmp("UTF-8", encodings[0])) { | 254 if (encodings[0] == NULL || !g_ascii_strcasecmp("UTF-8", encodings[0])) { |
231 g_strfreev(encodings); | 255 g_strfreev(encodings); |
232 return g_strdup(string); | 256 return g_strdup(string); |
233 } | 257 } |
234 | 258 |
235 utf8 = g_convert(string, strlen(string), encodings[0], "UTF-8", NULL, NULL, &err); | 259 strtmp = (char *)sanitize_utf((unsigned char *)string, strlen(string), &strtmp_len); |
260 utf8 = g_convert(strtmp, strlen(strtmp), encodings[0], "UTF-8", NULL, NULL, &err); | |
261 | |
236 if (err) { | 262 if (err) { |
237 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Send conversion error: %s\n", err->message); | 263 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Send conversion error: %s\n", err->message); |
238 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", encodings[0]); | 264 purple_debug(PURPLE_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", encodings[0]); |
239 utf8 = g_strdup(string); | 265 utf8 = g_strdup(strtmp); |
240 g_error_free(err); | 266 g_error_free(err); |
241 } | 267 } |
268 | |
269 /* yaz */ | |
270 if (!strncasecmp("iso-2022-jp", encodings[0], strlen("iso-2022-jp"))) { | |
271 escpos = strrchr(utf8, 0x1B); | |
272 if(escpos && (!strncmp(seq_kanji, escpos, 3) || !strncmp(seq_kana, escpos, 3))){ | |
273 char *oldutf8 = utf8; | |
274 utf8 = g_realloc(utf8, strlen(utf8)+1+3); | |
275 if(utf8) | |
276 strncat(utf8, seq_ascii, 3); | |
277 else | |
278 utf8 = oldutf8; | |
279 } | |
280 | |
281 if(irc_use_sosi || irc_use_8bit){ | |
282 /* SO/SI */ | |
283 //find kana escape and replace with roman+SO | |
284 temp = g_malloc0(strlen(utf8) * 7); //XXX should be reasonable size | |
285 pos = utf8; | |
286 while(pos < utf8+strlen(utf8)){ | |
287 escpos = strchr(pos, 0x1B); | |
288 if(escpos){ | |
289 if(!strncmp(seq_kana, escpos, 3)){ /* kana found */ | |
290 iskana = TRUE; | |
291 strncat(temp, pos, escpos-pos); | |
292 strcat(temp, seq_roman); | |
293 if(irc_use_sosi) | |
294 strcat(temp, SO); | |
295 pos = escpos+3; | |
296 } else { | |
297 if(iskana){ | |
298 char *ptr; | |
299 ptr = temp + strlen(temp); | |
300 while(pos<escpos){ | |
301 if(irc_use_8bit) | |
302 *ptr = *pos + 128; // convert to 8bit | |
303 else | |
304 *ptr = *pos; | |
305 ptr++; pos++; | |
306 } | |
307 if(irc_use_sosi) | |
308 strcat(temp, SI); | |
309 strncat(temp, escpos, 3); | |
310 pos = escpos+3; | |
311 iskana = FALSE; | |
312 } else { | |
313 strncat(temp, pos, escpos-pos+3); //include esc | |
314 pos = escpos+3; | |
315 } | |
316 } | |
317 } else { /* escpos == NULL */ | |
318 strcat(temp, pos); | |
319 break; | |
320 } | |
321 } | |
322 g_free(utf8); | |
323 utf8 = temp; | |
324 } | |
325 } | |
326 | |
242 g_strfreev(encodings); | 327 g_strfreev(encodings); |
243 | 328 g_free(strtmp); |
244 return utf8; | 329 return utf8; |
245 } | 330 } |
246 | 331 |
247 static char *irc_recv_convert(struct irc_conn *irc, const char *string) | 332 static char *irc_recv_convert(struct irc_conn *irc, char *string) |
248 { | 333 { |
249 char *utf8 = NULL; | 334 char *utf8 = NULL; |
250 const gchar *charset, *enclist; | 335 const gchar *charset, *enclist; |
251 gchar **encodings; | 336 gchar **encodings; |
252 int i; | 337 int i; |
338 GError *err; | |
339 gboolean retry; | |
340 gsize in_len, out_len; | |
341 int conv_len; | |
342 char *strtmp; | |
343 size_t strtmp_len; | |
253 | 344 |
254 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); | 345 enclist = purple_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); |
255 encodings = g_strsplit(enclist, ",", -1); | 346 encodings = g_strsplit(enclist, ",", -1); |
256 | 347 |
257 if (encodings[0] == NULL) { | 348 if (encodings[0] == NULL) { |
265 charset++; | 356 charset++; |
266 | 357 |
267 if (!g_ascii_strcasecmp("UTF-8", charset)) { | 358 if (!g_ascii_strcasecmp("UTF-8", charset)) { |
268 if (g_utf8_validate(string, -1, NULL)) | 359 if (g_utf8_validate(string, -1, NULL)) |
269 utf8 = g_strdup(string); | 360 utf8 = g_strdup(string); |
361 } else if (!strncasecmp("iso-2022-jp-2", charset, strlen("iso-2022-jp-2"))){ | |
362 /* pre-process quirky jis */ | |
363 unsigned char *jisstr; | |
364 unsigned char *ptr, *ptr2; | |
365 int state = ASCII; | |
366 int is8bit = FALSE; | |
367 | |
368 jisstr = (unsigned char *)calloc(1, strlen(string)*7); /* enough? */ | |
369 ptr = (unsigned char *)string; ptr2 = jisstr; | |
370 | |
371 while(*ptr){ | |
372 if(*ptr == 0x1B){ | |
373 /* escape sequence. */ | |
374 if(*(ptr+1) == 0x28 && *(ptr+2) == 0x42){ | |
375 state = ASCII; | |
376 | |
377 } else if(*(ptr+1) == 0x24 && *(ptr+2) == 0x42){ | |
378 state = KANJI; | |
379 | |
380 } else if(*(ptr+1) == 0x28 && *(ptr+2) == 0x49){ | |
381 state = KANA; | |
382 | |
383 } else if(*(ptr+1) == 0x28 && *(ptr+2) == 0x4a){ | |
384 state = ROMAN; | |
385 | |
386 } | |
387 purple_debug(PURPLE_DEBUG_INFO, "irc", "state %s\n", jisstate[state]); | |
388 } | |
389 if(*ptr >= 0xA1 && *ptr <= 0xDF){ | |
390 /* raw 8bit */ | |
391 if(!is8bit){ | |
392 strcat((char *)jisstr, seq[KANA]); | |
393 ptr2 += 3; | |
394 is8bit = TRUE; | |
395 purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit = TRUE\n"); | |
396 } | |
397 *ptr2 = *ptr - 0x80; | |
398 ptr++ ; ptr2++; | |
399 } else { | |
400 /* 7bit */ | |
401 if(*ptr == 0x0E){ | |
402 /* SO */ | |
403 strcat((char *)jisstr, seq[KANA]); | |
404 ptr++; ptr2 += 3; | |
405 purple_debug(PURPLE_DEBUG_INFO, "irc", "SO\n"); | |
406 continue; | |
407 } else if(*ptr == 0x0F){ | |
408 /* SI */ | |
409 strcat((char *)jisstr, seq[state]); | |
410 purple_debug(PURPLE_DEBUG_INFO, "irc", "SI to %s\n", jisstate[state]); | |
411 ptr++; ptr2 += 3; | |
412 purple_debug(PURPLE_DEBUG_INFO, "irc", "SI\n"); | |
413 continue; | |
414 } | |
415 if(is8bit){ /* the edge of 8bit -> 7bit */ | |
416 purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit to %s\n", jisstate[state]); | |
417 strcat((char *)jisstr, seq[state]); | |
418 ptr2 += 3; | |
419 is8bit=FALSE; | |
420 purple_debug(PURPLE_DEBUG_INFO, "irc", "8bit = FALSE\n"); | |
421 } | |
422 /* copy str */ | |
423 *ptr2 = *ptr; | |
424 ptr++; ptr2++; | |
425 } | |
426 } | |
427 | |
428 /* convert & error recovery */ | |
429 do { | |
430 err = NULL; | |
431 retry = FALSE; | |
432 | |
433 conv_len = strlen((char *)jisstr); | |
434 utf8 = g_convert_with_fallback((char *)jisstr, conv_len, "UTF-8", charset, | |
435 "?", &in_len, &out_len, &err); | |
436 if(err != NULL){ | |
437 if(err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE){ | |
438 memmove(jisstr + in_len, jisstr + in_len + 1, | |
439 conv_len - in_len -1); | |
440 conv_len--; | |
441 *(jisstr + conv_len) = '\0'; | |
442 retry = TRUE; | |
443 } | |
444 g_error_free(err); | |
445 } | |
446 } while(retry); | |
447 | |
448 if(jisstr) | |
449 free(jisstr); | |
450 | |
270 } else { | 451 } else { |
271 utf8 = g_convert(string, -1, "UTF-8", charset, NULL, NULL, NULL); | 452 do { |
272 } | 453 err = NULL; |
273 | 454 retry = FALSE; |
274 if (utf8) { | 455 |
275 g_strfreev(encodings); | 456 conv_len = strlen(string); |
276 return utf8; | 457 utf8 = g_convert_with_fallback(string, conv_len, "UTF-8", charset, |
458 "?", &in_len, &out_len, &err); | |
459 if(err != NULL){ | |
460 if(err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE){ | |
461 memmove(string + in_len, string + in_len + 1, | |
462 conv_len - in_len -1); | |
463 conv_len--; | |
464 *(string + conv_len) = '\0'; | |
465 retry = TRUE; | |
466 } | |
467 g_error_free(err); | |
468 } | |
469 } while(retry); | |
470 } | |
471 | |
472 | |
473 if(utf8){ | |
474 strtmp = (char *)botch_utf((unsigned char *)utf8, strlen(utf8), &strtmp_len); | |
475 g_strfreev(encodings); | |
476 g_free(utf8); | |
477 return strtmp; | |
277 } | 478 } |
278 } | 479 } |
279 g_strfreev(encodings); | 480 g_strfreev(encodings); |
280 | 481 |
281 return purple_utf8_salvage(string); | 482 return purple_utf8_salvage(string); |