Mercurial > audlegacy
changeset 2297:a9bc621d6b1b trunk
[svn] libguess update:
- follow the update of upstream.
- now precedence orders of encodings are explicitly specifiable on compile time.
- make UTF-8 the highest ordered eoncoding. (it may cope with the problems described in #738.)
author | yaz |
---|---|
date | Sun, 07 Jan 2007 21:17:40 -0800 |
parents | b9895bba71d0 |
children | 42b6473a4593 |
files | ChangeLog libguess/guess.c |
diffstat | 2 files changed, 163 insertions(+), 168 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog Sun Jan 07 15:23:46 2007 -0800 +++ b/ChangeLog Sun Jan 07 21:17:40 2007 -0800 @@ -1,3 +1,11 @@ +2007-01-07 23:23:46 +0000 William Pitcock <nenolod@nenolod.net> + revision [3600] + - fix XML definition for missing action: "track info" -> "current track info" + + trunk/audacious/ui/mainwin.ui | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + + 2007-01-07 23:21:28 +0000 William Pitcock <nenolod@nenolod.net> revision [3598] - fix ** CRITICAL **: bmp_rcfile_write_string: assertion `value != NULL'
--- a/libguess/guess.c Sun Jan 07 15:23:46 2007 -0800 +++ b/libguess/guess.c Sun Jan 07 21:17:40 2007 -0800 @@ -40,32 +40,35 @@ #include "libguess.h" #define NULL ((void *)0) -/* take precedence if scores are same. */ -#undef PREFER_UTF8 -#undef PREFER_SJIS -#undef PREFER_BIG5 -#undef PREFER_GB18030 -#undef PREFER_JOHAB +/* take precedence if scores are same. you can customize the order as: */ +/* ORDER_** &highest, &second, ... &lowest */ +#define ORDER_JP &utf8, &sjis, &eucj +#define ORDER_TW &utf8, &big5 +#define ORDER_CN &utf8, &gb2312, &gb18030 +#define ORDER_KR &utf8, &euck, &johab -/* workaround for that glib's g_convert can't convert properly from UCS-2BE/LE trailing after BOM. */ +/* workaround for that glib's g_convert can't convert + properly from UCS-2BE/LE trailing after BOM. */ #define WITH_G_CONVERT 1 /* #undef WITH_G_CONVERT */ #ifdef WITH_G_CONVERT -const char UCS_2BE[]="UTF-16"; -const char UCS_2LE[]="UTF-16"; +const char UCS_2BE[] = "UTF-16"; +const char UCS_2LE[] = "UTF-16"; #else -const char UCS_2BE[]="UCS-2BE"; -const char UCS_2LE[]="UCS-2LE"; +const char UCS_2BE[] = "UCS-2BE"; +const char UCS_2LE[] = "UCS-2LE"; #endif /* data types */ -typedef struct guess_arc_rec { +typedef struct guess_arc_rec +{ unsigned int next; /* next state */ double score; /* score */ } guess_arc; -typedef struct guess_dfa_rec { +typedef struct guess_dfa_rec +{ signed char (*states)[256]; guess_arc *arcs; int state; @@ -103,41 +106,49 @@ guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); guess_dfa *top = NULL; - for (i=0; i<buflen; i++) { - int c = (unsigned char)buf[i]; + guess_dfa *order[] = { ORDER_JP, NULL }; + + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; /* special treatment of iso-2022 escape sequence */ if (c == 0x1b) { - if (i < buflen-1) { - c = (unsigned char)buf[++i]; - if (c == '$' || c == '(') return "ISO-2022-JP"; + if (i < buflen - 1) { + c = (unsigned char) buf[++i]; + if (c == '$' || c == '(') + return "ISO-2022-JP"; } } - + /* special treatment of BOM */ - if (i==0 && c == 0xff) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xfe) return UCS_2LE; + if (i == 0 && c == 0xff) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xfe) + return UCS_2LE; } } - if (i==0 && c == 0xfe) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xff) return UCS_2BE; + if (i == 0 && c == 0xfe) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xff) + return UCS_2BE; } } if (DFA_ALIVE(eucj)) { - if (!DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) return "EUC-JP"; + if (!DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) + return "EUC-JP"; DFA_NEXT(eucj, c); } if (DFA_ALIVE(sjis)) { - if (!DFA_ALIVE(eucj) && !DFA_ALIVE(utf8)) return "SJIS"; + if (!DFA_ALIVE(eucj) && !DFA_ALIVE(utf8)) + return "SJIS"; DFA_NEXT(sjis, c); } if (DFA_ALIVE(utf8)) { - if (!DFA_ALIVE(sjis) && !DFA_ALIVE(eucj)) return "UTF-8"; + if (!DFA_ALIVE(sjis) && !DFA_ALIVE(eucj)) + return "UTF-8"; DFA_NEXT(utf8, c); } @@ -149,33 +160,19 @@ /* Now, we have ambigous code. Pick the highest score. If more than one candidate tie, pick the default encoding. */ - if (DFA_ALIVE(eucj)) top = &eucj; - if (DFA_ALIVE(utf8)) { - if (top) { -#if defined PREFER_UTF8 - if (top->score <= utf8.score) top = &utf8; -#else - if (top->score < utf8.score) top = &utf8; -#endif - } else { - top = &utf8; - } - } - if (DFA_ALIVE(sjis)) { - if (top) { -#if defined PREFER_SJIS - if (top->score <= sjis.score) top = &sjis; -#else - if (top->score < sjis.score) top = &sjis; -#endif - } else { - top = &sjis; + for (i = 0; order[i] != NULL; i++) { + if (order[i]->state >= 0) { //DFA_ALIVE() + if (top == NULL || order[i]->score > top->score) + top = order[i]; } } - if (top == &eucj) return "EUC-JP"; - if (top == &utf8) return "UTF-8"; - if (top == &sjis) return "SJIS"; + if (top == &eucj) + return "EUC-JP"; + if (top == &utf8) + return "UTF-8"; + if (top == &sjis) + return "SJIS"; return NULL; } @@ -186,37 +183,44 @@ guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); guess_dfa *top = NULL; - for (i=0; i<buflen; i++) { - int c = (unsigned char)buf[i]; + guess_dfa *order[] = { ORDER_TW, NULL }; + + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; /* special treatment of iso-2022 escape sequence */ if (c == 0x1b) { - if (i < buflen-1) { - c = (unsigned char)buf[++i]; - if (c == '$' || c == '(') return "ISO-2022-TW"; + if (i < buflen - 1) { + c = (unsigned char) buf[++i]; + if (c == '$' || c == '(') + return "ISO-2022-TW"; } } - + /* special treatment of BOM */ - if (i==0 && c == 0xff) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xfe) return UCS_2LE; + if (i == 0 && c == 0xff) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xfe) + return UCS_2LE; } } - if (i==0 && c == 0xfe) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xff) return UCS_2BE; + if (i == 0 && c == 0xfe) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xff) + return UCS_2BE; } } if (DFA_ALIVE(big5)) { - if (!DFA_ALIVE(utf8)) return "BIG5"; + if (!DFA_ALIVE(utf8)) + return "BIG5"; DFA_NEXT(big5, c); } if (DFA_ALIVE(utf8)) { - if (!DFA_ALIVE(big5)) return "UTF-8"; + if (!DFA_ALIVE(big5)) + return "UTF-8"; DFA_NEXT(utf8, c); } @@ -228,21 +232,17 @@ /* Now, we have ambigous code. Pick the highest score. If more than one candidate tie, pick the default encoding. */ - if (DFA_ALIVE(big5)) top = &big5; - if (DFA_ALIVE(utf8)) { - if (top) { -#if defined PREFER_UTF8 - if (top->score <= utf8.score) top = &utf8; -#else - if (top->score < utf8.score) top = &utf8; -#endif - } else { - top = &utf8; + for (i = 0; order[i] != NULL; i++) { + if (order[i]->state >= 0) { //DFA_ALIVE() + if (top == NULL || order[i]->score > top->score) + top = order[i]; } } - if (top == &big5) return "BIG5"; - if (top == &utf8) return "UTF-8"; + if (top == &big5) + return "BIG5"; + if (top == &utf8) + return "UTF-8"; return NULL; } @@ -254,43 +254,51 @@ guess_dfa gb18030 = DFA_INIT(guess_gb18030_st, guess_gb18030_ar); guess_dfa *top = NULL; - for (i=0; i<buflen; i++) { - int c = (unsigned char)buf[i]; + guess_dfa *order[] = { ORDER_CN, NULL }; + + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; int c2; /* special treatment of iso-2022 escape sequence */ if (c == 0x1b) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - c2 = (unsigned char)buf[i+2]; - if (c == '$' && (c2 == ')' || c2 == '+')) return "ISO-2022-CN"; + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + c2 = (unsigned char) buf[i + 2]; + if (c == '$' && (c2 == ')' || c2 == '+')) + return "ISO-2022-CN"; } } - + /* special treatment of BOM */ - if (i==0 && c == 0xff) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xfe) return UCS_2LE; + if (i == 0 && c == 0xff) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xfe) + return UCS_2LE; } } - if (i==0 && c == 0xfe) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xff) return UCS_2BE; + if (i == 0 && c == 0xfe) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xff) + return UCS_2BE; } } if (DFA_ALIVE(gb2312)) { - if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) return "GB2312"; + if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) + return "GB2312"; DFA_NEXT(gb2312, c); } if (DFA_ALIVE(utf8)) { - if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030)) return "UTF-8"; + if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030)) + return "UTF-8"; DFA_NEXT(utf8, c); } if (DFA_ALIVE(gb18030)) { - if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312)) return "GB18030"; + if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312)) + return "GB18030"; DFA_NEXT(gb18030, c); } @@ -302,33 +310,19 @@ /* Now, we have ambigous code. Pick the highest score. If more than one candidate tie, pick the default encoding. */ - if (DFA_ALIVE(gb2312)) top = &gb2312; - if (DFA_ALIVE(utf8)) { - if (top) { -#if defined PREFER_UTF8 - if (top->score <= utf8.score) top = &utf8; -#else - if (top->score < utf8.score) top = &utf8; -#endif - } else { - top = &utf8; - } - } - if (DFA_ALIVE(gb18030)) { - if (top) { -#if defined PREFER_GB18030 - if (top->score <= gb18030.score) top = &gb18030; -#else - if (top->score < gb18030.score) top = &gb18030; -#endif - } else { - top = &gb18030; + for (i = 0; order[i] != NULL; i++) { + if (order[i]->state >= 0) { //DFA_ALIVE() + if (top == NULL || order[i]->score > top->score) + top = order[i]; } } - if (top == &gb2312) return "GB2312"; - if (top == &utf8) return "UTF-8"; - if (top == &gb18030) return "GB18030"; + if (top == &gb2312) + return "GB2312"; + if (top == &utf8) + return "UTF-8"; + if (top == &gb18030) + return "GB18030"; return NULL; } @@ -340,43 +334,51 @@ guess_dfa johab = DFA_INIT(guess_johab_st, guess_johab_ar); guess_dfa *top = NULL; - for (i=0; i<buflen; i++) { - int c = (unsigned char)buf[i]; - int c2; + guess_dfa *order[] = { ORDER_KR, NULL }; + + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; + int c2; /* special treatment of iso-2022 escape sequence */ if (c == 0x1b) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - c2 = (unsigned char)buf[i+2]; - if (c == '$' && c2 == ')') return "ISO-2022-KR"; + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + c2 = (unsigned char) buf[i + 2]; + if (c == '$' && c2 == ')') + return "ISO-2022-KR"; } } - + /* special treatment of BOM */ - if (i==0 && c == 0xff) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xfe) return UCS_2LE; + if (i == 0 && c == 0xff) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xfe) + return UCS_2LE; } } - if (i==0 && c == 0xfe) { - if (i < buflen-1) { - c = (unsigned char)buf[i+1]; - if (c == 0xff) return UCS_2BE; + if (i == 0 && c == 0xfe) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xff) + return UCS_2BE; } } if (DFA_ALIVE(euck)) { - if (!DFA_ALIVE(johab) && !DFA_ALIVE(utf8)) return "EUC-KR"; + if (!DFA_ALIVE(johab) && !DFA_ALIVE(utf8)) + return "EUC-KR"; DFA_NEXT(euck, c); } if (DFA_ALIVE(johab)) { - if (!DFA_ALIVE(euck) && !DFA_ALIVE(utf8)) return "JOHAB"; + if (!DFA_ALIVE(euck) && !DFA_ALIVE(utf8)) + return "JOHAB"; DFA_NEXT(johab, c); } if (DFA_ALIVE(utf8)) { - if (!DFA_ALIVE(euck) && !DFA_ALIVE(johab)) return "UTF-8"; + if (!DFA_ALIVE(euck) && !DFA_ALIVE(johab)) + return "UTF-8"; DFA_NEXT(utf8, c); } @@ -388,33 +390,18 @@ /* Now, we have ambigous code. Pick the highest score. If more than one candidate tie, pick the default encoding. */ - if (DFA_ALIVE(euck)) top = &euck; - if (DFA_ALIVE(utf8)) { - if (top) { -#if defined PREFER_UTF8 - if (top->score <= utf8.score) top = &utf8; -#else - if (top->score < utf8.score) top = &utf8; -#endif - } else { - top = &utf8; - } - } - if (DFA_ALIVE(johab)) { - if (top) { -#if defined PREFER_JOAHB - if (top->score <= johab.score) top = &johab; -#else - if (top->score < johab.score) top = &johab; -#endif - } else { - top = &johab; + for (i = 0; order[i] != NULL; i++) { + if (order[i]->state >= 0) { //DFA_ALIVE() + if (top == NULL || order[i]->score > top->score) + top = order[i]; } } - if (top == &euck) return "EUC-KR"; - if (top == &utf8) return "UTF-8"; - if (top == &johab) return "JOHAB"; + if (top == &euck) + return "EUC-KR"; + if (top == &utf8) + return "UTF-8"; + if (top == &johab) + return "JOHAB"; return NULL; } -