# HG changeset patch # User yaz # Date 1148752928 25200 # Node ID 4be4d74db1238d5393a979da4bf7c4b300eaa3be # Parent c2fc86e40fba99f7f1e766e7dd7d5000cca2be0d [svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature. diff -r c2fc86e40fba -r 4be4d74db123 Makefile.in --- a/Makefile.in Wed May 24 16:31:15 2006 -0700 +++ b/Makefile.in Sat May 27 11:02:08 2006 -0700 @@ -1,7 +1,7 @@ include mk/rules.mk include mk/objective.mk -SUBDIRS = Plugins libaudacious intl audacious po icons skin +SUBDIRS = Plugins libaudacious intl $(SUBDIR_GUESS) audacious po icons skin install-posthook: @if test `whoami` = 'root' && test -z "$(DESTDIR)"; then \ diff -r c2fc86e40fba -r 4be4d74db123 Plugins/General/scrobbler/tags/unicode.c --- a/Plugins/General/scrobbler/tags/unicode.c Wed May 24 16:31:15 2006 -0700 +++ b/Plugins/General/scrobbler/tags/unicode.c Sat May 27 11:02:08 2006 -0700 @@ -23,6 +23,7 @@ #include #include "include/endian.h" #include "include/unicode.h" +#include "audacious/util.h" wchar_t *utf8_to_wchar(unsigned char *utf, size_t memsize) { @@ -142,6 +143,13 @@ void iso88591_to_utf8(unsigned char *iso, size_t memsize, unsigned char **utf) { + *utf = str_to_utf8(iso); +} + +#if 0 +void iso88591_to_utf8(unsigned char *iso, size_t memsize, + unsigned char **utf) +{ size_t i; wchar_t *wchar; @@ -150,6 +158,7 @@ *utf = wchar_to_utf8(wchar, memsize); free(wchar); } +#endif void utf16bom_to_utf8(unsigned char *utf16, size_t memsize, unsigned char **utf) diff -r c2fc86e40fba -r 4be4d74db123 Plugins/Input/mpg123/fileinfo.c --- a/Plugins/Input/mpg123/fileinfo.c Wed May 24 16:31:15 2006 -0700 +++ b/Plugins/Input/mpg123/fileinfo.c Sat May 27 11:02:08 2006 -0700 @@ -683,12 +683,15 @@ fill_entries(GtkWidget * w, gpointer data) { VFSFile *fh; - gchar *ptr; + gchar *ptr, *ptr2; guint32 i; if (str_has_prefix_nocase(current_filename, "http://")) return; - + +#ifdef USE_CHARDET + taglib_set_strings_unicode(FALSE); +#endif taglib_file = taglib_file_new(current_filename); if(taglib_file) { taglib_tag = taglib_file_tag(taglib_file); @@ -701,23 +704,35 @@ /* be sane here, taglib_tag results may be NULL --nenolod */ ptr = taglib_tag_title(taglib_tag); - if (ptr != NULL) - gtk_entry_set_text(GTK_ENTRY(title_entry), ptr); + if (ptr != NULL) { + ptr2 = str_to_utf8(ptr); + gtk_entry_set_text(GTK_ENTRY(title_entry), ptr2); + g_free(ptr2); + } ptr = taglib_tag_artist(taglib_tag); - if (ptr != NULL) - gtk_entry_set_text(GTK_ENTRY(artist_entry), ptr); + if (ptr != NULL) { + ptr2 = str_to_utf8(ptr); + gtk_entry_set_text(GTK_ENTRY(artist_entry), ptr2); + g_free(ptr2); + } ptr = taglib_tag_album(taglib_tag); - if (ptr != NULL) - gtk_entry_set_text(GTK_ENTRY(album_entry), ptr); - + if (ptr != NULL) { + ptr2 = str_to_utf8(ptr); + gtk_entry_set_text(GTK_ENTRY(album_entry), ptr2); + g_free(ptr2); + } + ptr = taglib_tag_comment(taglib_tag); - if (ptr != NULL) - gtk_entry_set_text(GTK_ENTRY(comment_entry), ptr); + if (ptr != NULL) { + ptr2 = str_to_utf8(ptr); + gtk_entry_set_text(GTK_ENTRY(comment_entry), ptr2); + g_free(ptr2); + } i = taglib_tag_year(taglib_tag); diff -r c2fc86e40fba -r 4be4d74db123 Plugins/Input/mpg123/mpg123.c --- a/Plugins/Input/mpg123/mpg123.c Wed May 24 16:31:15 2006 -0700 +++ b/Plugins/Input/mpg123/mpg123.c Sat May 27 11:02:08 2006 -0700 @@ -494,7 +494,19 @@ REMOVE_NONEXISTANT_TAG(input->genre); REMOVE_NONEXISTANT_TAG(input->comment); } + if(input->performer) + input->performer = str_to_utf8(input->performer); + if(input->album_name) + input->album_name = str_to_utf8(input->album_name); + + if(input->track_name) + input->track_name = str_to_utf8(input->track_name); + + if(input->comment) + input->comment = str_to_utf8(input->comment); + + input->file_name = g_path_get_basename(filename); input->file_path = g_path_get_dirname(filename); input->file_ext = extname(filename); @@ -510,6 +522,15 @@ *(extname(title) - 1) = '\0'; /* removes period */ } + if(input->performer) + g_free(input->performer); + if(input->album_name) + g_free(input->album_name); + if(input->track_name) + g_free(input->track_name); + if(input->comment) + g_free(input->comment); + g_free(input->file_path); g_free(input->file_name); g_free(input); @@ -525,6 +546,9 @@ get_song_title(char *filename) { char *ret = NULL; +#ifdef USE_CHARDET + taglib_set_strings_unicode(FALSE); +#endif taglib_file = taglib_file_new(filename); taglib_tag = NULL; if(taglib_file) { diff -r c2fc86e40fba -r 4be4d74db123 audacious/glade/prefswin.glade --- a/audacious/glade/prefswin.glade Wed May 24 16:31:15 2006 -0700 +++ b/audacious/glade/prefswin.glade Sat May 27 11:02:08 2006 -0700 @@ -2260,6 +2260,119 @@ False + + + + True + 2 + 2 + False + 0 + 0 + + + + True + Fallback charcter encodings: + False + False + GTK_JUSTIFY_RIGHT + False + False + 1 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + 1 + 1 + 2 + fill + + + + + + + True + List of character encodings used for fall back conversion of metadata. If automatic character encoding detector failed or has been disabled, encodings in this list would be treated as candidates of the encoding of metadata, and fall back conversion from these encodings to UTF-8 would be attempted. + True + True + True + 0 + + True + * + False + + + + + 1 + 2 + 1 + 2 + + + + + + + True + + False + True + + + + 1 + 2 + 0 + 1 + fill + + + + + + True + Auto character encoding detector for: + False + False + GTK_JUSTIFY_RIGHT + False + False + 1 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + 1 + 0 + 1 + fill + + + + + + 0 + True + True + + diff -r c2fc86e40fba -r 4be4d74db123 audacious/main.c --- a/audacious/main.c Wed May 24 16:31:15 2006 -0700 +++ b/audacious/main.c Sat May 27 11:02:08 2006 -0700 @@ -226,6 +226,19 @@ const guint n_titlestring_presets = G_N_ELEMENTS(bmp_titlestring_presets); +const gchar *chardet_detector_presets[] = { + "None", + "Japanese", + "Taiwanese (alpha test)", + "Chinese (alpha test)", + "Korean (alpha test)", +#ifdef HAVE_UDET + "Universal" +#endif +}; + +const guint n_chardet_detector_presets = G_N_ELEMENTS(chardet_detector_presets); + static bmp_cfg_boolent bmp_boolents[] = { {"allow_multiple_instances", &cfg.allow_multiple_instances, TRUE}, {"use_realtime", &cfg.use_realtime, TRUE}, @@ -319,6 +332,8 @@ {"filesel_path", &cfg.filesel_path, FALSE}, {"playlist_path", &cfg.playlist_path, FALSE}, {"generic_title_format", &cfg.gentitle_format, TRUE}, + {"chardet_detector", &cfg.chardet_detector, TRUE}, + {"chardet_fallback", &cfg.chardet_fallback, TRUE}, }; static gint ncfgsent = G_N_ELEMENTS(bmp_strents); diff -r c2fc86e40fba -r 4be4d74db123 audacious/main.h --- a/audacious/main.h Wed May 24 16:31:15 2006 -0700 +++ b/audacious/main.h Sat May 27 11:02:08 2006 -0700 @@ -107,6 +107,8 @@ gboolean resume_playback_on_startup; gint resume_playback_on_startup_time; gboolean show_separator_in_pl; + gchar *chardet_detector; + gchar *chardet_fallback; }; typedef struct _BmpConfig BmpConfig; @@ -138,6 +140,9 @@ extern const gchar *bmp_titlestring_presets[]; extern const guint n_titlestring_presets; +extern const gchar *chardet_detector_presets[]; +extern const guint n_chardet_detector_presets; + extern GList *dock_window_list; extern gboolean pposition_broken; diff -r c2fc86e40fba -r 4be4d74db123 audacious/prefswin.c --- a/audacious/prefswin.c Wed May 24 16:31:15 2006 -0700 +++ b/audacious/prefswin.c Sat May 27 11:02:08 2006 -0700 @@ -1860,6 +1860,85 @@ } +static void +on_chardet_detector_cbox_changed(GtkComboBox * combobox, gpointer data) +{ + ConfigDb *db; + gint position; + + position = gtk_combo_box_get_active(GTK_COMBO_BOX(combobox)); + cfg.chardet_detector = (char *)chardet_detector_presets[position]; + + db = bmp_cfg_db_open(); + bmp_cfg_db_set_string(db, NULL, "chardet_detector", cfg.chardet_detector); + bmp_cfg_db_close(db); + gtk_widget_set_sensitive(GTK_WIDGET(data), 1); +} + +static void +on_chardet_detector_cbox_realize(GtkComboBox *combobox, gpointer data) +{ + ConfigDb *db; + gchar *ret=NULL; + guint i=0,index=0; + + db = bmp_cfg_db_open(); + if(bmp_cfg_db_get_string(db, NULL, "chardet_detector", &ret) != FALSE) { + for(i=0; i + #include +#endif static GQuark quark_popup_data; @@ -1244,6 +1248,10 @@ if (!str) return NULL; + /* chardet encoding detector */ + if ((out_str = chardet_to_utf8(str, strlen(str), NULL, NULL, NULL))) + return out_str; + /* already UTF-8? */ if (g_utf8_validate(str, -1, NULL)) return g_strdup(str); @@ -1386,8 +1394,80 @@ } +gchar *chardet_to_utf8(const gchar *str, gssize len, + gsize *arg_bytes_read, gsize *arg_bytes_write, GError **arg_error) +{ +#ifdef USE_CHARDET + char *det = NULL, *encoding = NULL; +#endif + gchar *ret = NULL; + gsize *bytes_read, *bytes_write; + GError **error; + gsize my_bytes_read, my_bytes_write; + bytes_read = arg_bytes_read ? arg_bytes_read : &my_bytes_read; + bytes_write = arg_bytes_write ? arg_bytes_write : &my_bytes_write; + error = arg_error ? arg_error : NULL; +#ifdef USE_CHARDET + if(cfg.chardet_detector) + det = cfg.chardet_detector; + if(det){ + if(!strncasecmp("japanese", det, sizeof("japanese"))) { + encoding = (char *)guess_jp(str, strlen(str)); + if (!encoding) + goto fallback; + } else if(!strncasecmp("taiwanese", det, sizeof("taiwanese"))) { + encoding = (char *)guess_tw(str, strlen(str)); + if (!encoding) + goto fallback; + } else if(!strncasecmp("chinese", det, sizeof("chinese"))) { + encoding = (char *)guess_cn(str, strlen(str)); + if (!encoding) + goto fallback; + } else if(!strncasecmp("korean", det, sizeof("korean"))) { + encoding = (char *)guess_kr(str, strlen(str)); + if (!encoding) + goto fallback; +#ifdef HAVE_UDET + } else if (!strncasecmp("universal", det, sizeof("universal"))) { + encoding = (char *)detectCharset((char *)str, strlen(str)); + if (!encoding) + goto fallback; +#endif + } else /* none, invalid */ + goto fallback; + ret = g_convert(str, len, "UTF-8", encoding, bytes_read, bytes_write, error); + } +#endif +fallback: + if(!ret && cfg.chardet_fallback){ + gchar **encs=NULL, **enc=NULL; + encs = g_strsplit_set(cfg.chardet_fallback, " ,:;|/", 0); + + if(encs){ + enc = encs; + for(enc=encs; *enc ; enc++){ + ret = g_convert(str, len, "UTF-8", *enc, bytes_read, bytes_write, error); + if(len == *bytes_read){ + break; + } + } + g_strfreev(encs); + } + } + + if(ret){ + if(g_utf8_validate(ret, -1, NULL)) + return ret; + else { + g_free(ret); + ret = NULL; + } + } + + return NULL; // if I have no idea, return NULL. +} diff -r c2fc86e40fba -r 4be4d74db123 audacious/util.h --- a/audacious/util.h Wed May 24 16:31:15 2006 -0700 +++ b/audacious/util.h Sat May 27 11:02:08 2006 -0700 @@ -125,5 +125,7 @@ gint y; } MenuPos; +gchar *chardet_to_utf8(const gchar *str, gssize len, + gsize *arg_bytes_read, gsize *arg_bytes_write, GError **arg_error); #endif diff -r c2fc86e40fba -r 4be4d74db123 configure.ac --- a/configure.ac Wed May 24 16:31:15 2006 -0700 +++ b/configure.ac Sat May 27 11:02:08 2006 -0700 @@ -186,6 +186,21 @@ AM_CONDITIONAL(USE_IPV6,test "x$enable_ipv6" = xyes) AC_SUBST(USE_IPV6) +dnl chardet support +dnl ======================== +AC_ARG_ENABLE(chardet, + [ --enable-chardet enable character set detection support (default=no)], + enable_chardet=$enableval, enable_chardet=no) +if test "x$enable_chardet" = xyes; then + AC_DEFINE(USE_CHARDET, 1, [Define if character set detection enabled] ) + AC_CHECK_LIB(guess, guess_jp, [SUBDIR_GUESS=['']], [SUBDIR_GUESS=['libguess']]) + CHARDET_LIBS=['-lguess'] + AC_CHECK_LIB(udet_c, detectCharset, [AC_DEFINE(HAVE_UDET, 1,[Define if the system has Mozilla universal character detector library]) CHARDET_LIBS=["$CHARDET_LIBS -ludet -ludet_c"]]) +fi +AM_CONDITIONAL(USE_CHARDET,test "x$enable_chardet" = xyes) +AC_SUBST(USE_CHARDET) +AC_SUBST(CHARDET_LIBS) +AC_SUBST(SUBDIR_GUESS) dnl GConf support @@ -831,6 +846,7 @@ audacious/glade/Makefile audacious/images/Makefile libaudacious/Makefile + libguess/Makefile Plugins/Makefile Plugins/Output/Makefile Plugins/Output/OSS/Makefile diff -r c2fc86e40fba -r 4be4d74db123 libguess/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libguess/Makefile Sat May 27 11:02:08 2006 -0700 @@ -0,0 +1,24 @@ +include ../mk/rules.mk +include ../mk/objective.mk + +OBJECTIVE_LIBS = libguess.so +OBJECTIVE_SONAME_SUFFIX = 0.2.0 + +LDFLAGS += -Wl,-export-dynamic + +CFLAGS += -fPIC -DPIC + +SOURCES = \ + guess.c + +OBJECTS = ${SOURCES:.c=.o} + +HEADERS = \ + libguess.h + +install-posthook: + @mv ${DESTDIR}/${LIBDIR}/libguess.so ${DESTDIR}/${LIBDIR}/libguess.so.0.2.0 + @ln -sf ${LIBDIR}/libguess.so.0.2.0 \ + ${DESTDIR}/${LIBDIR}/libguess.so.0 + @ln -sf ${LIBDIR}/libguess.so.0 \ + ${DESTDIR}/${LIBDIR}/libguess.so diff -r c2fc86e40fba -r 4be4d74db123 libguess/Makefile.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libguess/Makefile.in Sat May 27 11:02:08 2006 -0700 @@ -0,0 +1,24 @@ +include ../mk/rules.mk +include ../mk/objective.mk + +OBJECTIVE_LIBS = libguess.so +OBJECTIVE_SONAME_SUFFIX = 0.2.0 + +LDFLAGS += -Wl,-export-dynamic + +CFLAGS += -fPIC -DPIC + +SOURCES = \ + guess.c + +OBJECTS = ${SOURCES:.c=.o} + +HEADERS = \ + libguess.h + +install-posthook: + @mv ${DESTDIR}/${LIBDIR}/libguess.so ${DESTDIR}/${LIBDIR}/libguess.so.0.2.0 + @ln -sf ${LIBDIR}/libguess.so.0.2.0 \ + ${DESTDIR}/${LIBDIR}/libguess.so.0 + @ln -sf ${LIBDIR}/libguess.so.0 \ + ${DESTDIR}/${LIBDIR}/libguess.so diff -r c2fc86e40fba -r 4be4d74db123 libguess/guess.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libguess/guess.c Sat May 27 11:02:08 2006 -0700 @@ -0,0 +1,394 @@ +/* + * This code is derivative of guess.c of Gauche-0.8.3. + * The following is the original copyright notice. + */ + +/* + * guess.c - guessing character encoding + * + * Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the authors nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "libguess.h" +#define NULL ((void *)0) + +/* take precedence if scores are same. */ +#undef PREFER_UTF8 +#undef PREFER_SJIS +#undef PREFER_BIG5 +#undef PREFER_GB18030 +#undef PREFER_JOHAB + +/* data types */ +typedef struct guess_arc_rec { + unsigned int next; /* next state */ + double score; /* score */ +} guess_arc; + +typedef struct guess_dfa_rec { + signed char (*states)[256]; + guess_arc *arcs; + int state; + double score; +} guess_dfa; + +/* macros */ +#define DFA_INIT(st, ar) \ + { st, ar, 0, 1.0 } + +#define DFA_NEXT(dfa, ch) \ + do { \ + int arc__; \ + if (dfa.state >= 0) { \ + arc__ = dfa.states[dfa.state][ch]; \ + if (arc__ < 0) { \ + dfa.state = -1; \ + } else { \ + dfa.state = dfa.arcs[arc__].next; \ + dfa.score *= dfa.arcs[arc__].score; \ + } \ + } \ + } while (0) + +#define DFA_ALIVE(dfa) (dfa.state >= 0) + +/* include DFA table generated by guess.scm */ +#include "guess_tab.c" + +const char *guess_jp(const char *buf, int buflen) +{ + int i; + guess_dfa eucj = DFA_INIT(guess_eucj_st, guess_eucj_ar); + guess_dfa sjis = DFA_INIT(guess_sjis_st, guess_sjis_ar); + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); + guess_dfa *top = NULL; + + for (i=0; iscore <= utf8.score) top = &utf8; +#else + if (top->score < utf8.score) top = &utf8; +#endif + } else { + top = &utf8; + } + } + if (DFA_ALIVE(sjis)) { + if (top) { +#if defined PREFER_SJIS + if (top->score <= sjis.score) top = &sjis; +#else + if (top->score < sjis.score) top = &sjis; +#endif + } else { + top = &sjis; + } + } + + if (top == &eucj) return "EUC-JP"; + if (top == &utf8) return "UTF-8"; + if (top == &sjis) return "SJIS"; + return NULL; +} + +const char *guess_tw(const char *buf, int buflen) +{ + int i; + guess_dfa big5 = DFA_INIT(guess_big5_st, guess_big5_ar); + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); + guess_dfa *top = NULL; + + for (i=0; iscore <= utf8.score) top = &utf8; +#else + if (top->score < utf8.score) top = &utf8; +#endif + } else { + top = &utf8; + } + } + + if (top == &big5) return "BIG5"; + if (top == &utf8) return "UTF-8"; + return NULL; +} + +const char *guess_cn(const char *buf, int buflen) +{ + int i; + guess_dfa gb2312 = DFA_INIT(guess_gb2312_st, guess_gb2312_ar); + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); + guess_dfa gb18030 = DFA_INIT(guess_gb18030_st, guess_gb18030_ar); + guess_dfa *top = NULL; + + for (i=0; iscore <= utf8.score) top = &utf8; +#else + if (top->score < utf8.score) top = &utf8; +#endif + } else { + top = &utf8; + } + } + if (DFA_ALIVE(gb18030)) { + if (top) { +#if defined PREFER_GB18030 + if (top->score <= gb18030.score) top = &gb18030; +#else + if (top->score < gb18030.score) top = &gb18030; +#endif + } else { + top = &gb18030; + } + } + + if (top == &gb2312) return "GB2312"; + if (top == &utf8) return "UTF-8"; + if (top == &gb18030) return "GB18030"; + return NULL; +} + +const char *guess_kr(const char *buf, int buflen) +{ + int i; + guess_dfa euck = DFA_INIT(guess_euck_st, guess_euck_ar); + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); + guess_dfa johab = DFA_INIT(guess_johab_st, guess_johab_ar); + guess_dfa *top = NULL; + + for (i=0; iscore <= utf8.score) top = &utf8; +#else + if (top->score < utf8.score) top = &utf8; +#endif + } else { + top = &utf8; + } + } + if (DFA_ALIVE(johab)) { + if (top) { +#if defined PREFER_JOAHB + if (top->score <= johab.score) top = &johab; +#else + if (top->score < johab.score) top = &johab; +#endif + } else { + top = &johab; + } + } + + if (top == &euck) return "EUC-KR"; + if (top == &utf8) return "UTF-8"; + if (top == &johab) return "JOHAB"; + return NULL; +} + diff -r c2fc86e40fba -r 4be4d74db123 libguess/guess.scm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libguess/guess.scm Sat May 27 11:02:08 2006 -0700 @@ -0,0 +1,384 @@ +;;; +;;; This code is derivative of guess.c of Gauche-0.8.7. +;;; The following is the original copyright notice. +;;; + +;;; +;;; Auxiliary script to generate japanese code guessing table +;;; +;;; Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; +;;; 3. Neither the name of the authors nor the names of its contributors +;;; may be used to endorse or promote products derived from this +;;; software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;; +;;; $Id: guess.scm,v 1.3 2003/07/05 03:29:10 shirok Exp $ +;;; + +(use srfi-1) +(use gauche.sequence) + +;; This is a simple state machine compiler. +;; +;; : (define-dfa ...) +;; : ( ( ) ...) +;; : symbol +;; : symbol +;; : real +;; : ( ...) +;; : | ( ) +;; : integer between 0 and #xff | ASCII char +;; +;; When evaluated, the DFA generates a state transition table in +;; C source format. + +(define-class () + ((name :init-keyword :name :accessor name-of) + (states :init-keyword :states :accessor states-of) + (instances :allocation :class :init-value '()))) + +(define-class () + ((name :init-keyword :name :accessor name-of) + (index :init-keyword :index :accessor index-of) + (arcs :init-keyword :arcs :accessor arcs-of :init-value '()))) + +(define-class () + ((from-state :init-keyword :from-state :accessor from-state-of) + (to-state :init-keyword :to-state :accessor to-state-of) + (ranges :init-keyword :ranges :accessor ranges-of) + (index :init-keyword :index :accessor index-of) + (score :init-keyword :score :accessor score-of))) + +;; Create DFA + +(define-syntax define-dfa + (syntax-rules () + ((_ name . states) + (define name (make + :name 'name + :states (resolve-states 'states)))))) + +(define-method initialize ((self ) initargs) + (next-method) + (slot-push! self 'instances self)) + +(define (all-dfas) (reverse (class-slot-ref 'instances))) + +(define (resolve-states state-defs) + (let ((states (map (lambda (d i) (make :name (car d) :index i)) + state-defs + (iota (length state-defs))))) + (fold (lambda (s d i) + (let1 num-arcs (length (cdr d)) + (set! (arcs-of s) + (map (lambda (arc aindex) + (make + :from-state s + :to-state (or (find (lambda (e) + (eq? (name-of e) (cadr arc))) + states) + (error "no such state" (cadr arc))) + :ranges (car arc) + :index aindex + :score (caddr arc))) + (cdr d) + (iota num-arcs i))) + (+ i num-arcs))) + 0 + states state-defs) + states)) + +;; Emit state table +(define (emit-dfa-table dfa) + (format #t "static signed char guess_~a_st[][256] = {\n" (name-of dfa)) + (for-each emit-state-table (states-of dfa)) + (print "};\n") + (format #t "static guess_arc guess_~a_ar[] = {\n" (name-of dfa)) + (for-each emit-arc-table + (append-map arcs-of (states-of dfa))) + (print "};\n") + ) + +(define (emit-state-table state) + (define (b2i byte) ;byte->integer + (if (char? byte) (char->integer byte) byte)) + (let1 arc-vec (make-vector 256 -1) + (dolist (br (arcs-of state)) + (dolist (range (ranges-of br)) + (if (pair? range) + (vector-fill! arc-vec (index-of br) + (b2i (car range)) (+ (b2i (cadr range)) 1)) + (set! (ref arc-vec (b2i range)) (index-of br))))) + (format #t " { /* state ~a */" (name-of state)) + (dotimes (i 256) + (when (zero? (modulo i 16)) (newline)) + (format #t " ~2d," (ref arc-vec i))) + (print "\n },") + )) + +(define (emit-arc-table arc) + (format #t " { ~2d, ~5s }, /* ~a -> ~a */\n" + (index-of (to-state-of arc)) + (score-of arc) + (name-of (from-state-of arc)) + (name-of (to-state-of arc)))) +;; +;; main +;; + +(define (main args) + (unless (= (length args) 2) + (error "usage: ~a " (car args))) + (with-output-to-file (cadr args) + (lambda () + (print "/* State transition table for character code guessing */") + (print "/* This file is automatically generated by guess.scm */") + (newline) + (for-each emit-dfa-table (all-dfas)))) + 0) + +;;;============================================================ +;;; DFA definitions +;;; + +;;; +;;; EUC-JP +;;; + +(define-dfa eucj + ;; first byte + (init + (((#x00 #x7f)) init 1.0) ; ASCII range + ((#x8e) jis0201_kana 0.8) ; JISX 0201 kana + ((#x8f) jis0213_2 0.95) ; JISX 0213 plane 2 + (((#xa1 #xfe)) jis0213_1 1.0) ; JISX 0213 plane 1 + ) + ;; jis x 0201 kana + (jis0201_kana + (((#xa1 #xdf)) init 1.0) + ) + ;; jis x 0208 and jis x 0213 plane 1 + (jis0213_1 + (((#xa1 #xfe)) init 1.0)) + ;; jis x 0213 plane 2 + (jis0213_2 + (((#xa1 #xfe)) init 1.0)) + ) + +;;; +;;; Shift_JIS +;;; + +(define-dfa sjis + ;; first byte + (init + (((#x00 #x7f)) init 1.0) ;ascii + (((#x81 #x9f) (#xe1 #xef)) jis0213 1.0) ;jisx0213 plane 1 + (((#xa1 #xdf)) init 0.8) ;jisx0201 kana + (((#xf0 #xfc)) jis0213 0.95) ;jisx0213 plane 2 + (((#xfd #xff)) init 0.8)) ;vendor extension + (jis0213 + (((#x40 #x7e) (#x80 #xfc)) init 1.0)) + ) + +;;; +;;; UTF-8 +;;; + +(define-dfa utf8 + (init + (((#x00 #x7f)) init 1.0) + (((#xc2 #xdf)) 1byte_more 1.0) + (((#xe0 #xef)) 2byte_more 1.0) + (((#xf0 #xf7)) 3byte_more 1.0) + (((#xf8 #xfb)) 4byte_more 1.0) + (((#xfc #xfd)) 5byte_more 1.0)) + (1byte_more + (((#x80 #xbf)) init 1.0)) + (2byte_more + (((#x80 #xbf)) 1byte_more 1.0)) + (3byte_more + (((#x80 #xbf)) 2byte_more 1.0)) + (4byte_more + (((#x80 #xbf)) 3byte_more 1.0)) + (5byte_more + (((#x80 #xbf)) 4byte_more 1.0)) + ) + +;;; +;;; UCS-2LE +;;; + +(define-dfa ucs2le + (init + ((#xff) le 1.0) + (((#x00 #x7f)) ascii 1.0) + (((#x00 #xff)) multi 1.0)) + (le + ((#xfe) init 1.0)) + (ascii + ((#x00) init 1.0)) + (multi + (((#x00 #xff)) init 1.0))) + +;;; +;;; UCS-2BE +;;; +(define-dfa ucs2be + (init + ((#xfe) be 1.0) + ((#x00) ascii 1.0) + (((#x00 #xff)) multi 1.0)) + (be + ((#xff) init 1.0)) + (ascii + (((#x00 #x7f)) init 1.0)) + (multi + (((#x00 #xff)) init 1.0))) + + +;;; +;;; JIS (ISO2022JP) +;;; + +;; NB: for now, we just check the sequence of $ or '('. +'(define-dfa jis + (init + ((#x1b) esc 1.0) + (((#x00 #x1a) (#x1c #x1f)) init 1.0) ;C0 + (((#x20 #x7f)) init 1.0) ;ASCII + (((#xa1 #xdf)) init 0.7) ;JIS8bit kana + ) + (esc + ((#x0d #x0a) init 0.9) ;cancel + ((#\( ) esc-paren 1.0) + ((#\$ ) esc-$ 1.0) + ((#\& ) esc-& 1.0) + ) + (esc-paren + ((#\B #\J #\H) init 1.0) + ((#\I) jis0201kana 0.8) + ) + (esc-$ + ((#\@ #\B) kanji 1.0) + ((#\( ) esc-$-paren 1.0) + ) + (esc-$-paren + ((#\D #\O #\P) kanji 1.0)) + (esc-& + ((#\@ ) init 1.0)) + (jis0201kana + ((#x1b) esc 1.0) + (((#x20 #x5f)) jis0201kana 1.0)) + (kanji + ((#x1b) esc 1.0) + (((#x21 #x7e)) kanji-2 1.0)) + (kanji-2 + (((#x21 #x7e)) kanji 1.0)) + ) + +;;; +;;; Big5 +;;; + +(define-dfa big5 + ;; first byte + (init + (((#x00 #x7f)) init 1.0) ;ascii + (((#xa1 #xfe)) 2byte 1.0) ;big5-2byte + ) + (2byte + (((#x40 #x7e) (#xa1 #xfe)) init 1.0)) + ) + +;;; +;;; GB2312 (EUC-CN?) +;;; + +(define-dfa gb2312 + ;; first byte + (init + (((#x00 #x7f)) init 1.0) ;ascii + (((#xa1 #xfe)) 2byte 1.0) ;gb2312 2byte + ) + (2byte + (((#xa1 #xfe)) init 1.0)) + ) + +;;; +;;; GB18030 +;;; + +(define-dfa gb18030 + ;; first byte + (init + (((#x00 #x80)) init 1.0) ;ascii + (((#x81 #xfe)) 2byte 1.0) ;gb18030 2byte + (((#x81 #xfe)) 4byte2 1.0) ;gb18030 2byte + ) + (2byte + (((#x40 #x7e) (#x80 #xfe)) init 1.0)) + (4byte2 + (((#x30 #x39)) 4byte3 1.0)) + (4byte3 + (((#x81 #xfe)) 4byte4 1.0)) + (4byte4 + (((#x30 #x39)) init 1.0)) + ) + +;;; +;;; EUC-KR +;;; + +(define-dfa euck + ;; first byte + (init + (((#x00 #x7f)) init 1.0) ; ASCII range + (((#xa1 #xfe)) ks1001 1.0) ; KSX 1001 + ) + ;; ks x 1001 + (ks1001 + (((#xa1 #xfe)) init 1.0)) + ) + +;;; +;;; Johab +;;; + +(define-dfa johab + ;; first byte + (init + (((#x00 #x7f)) init 1.0) ; ASCII range + (((#x84 #xd3)) jamo51 1.0) ; jamo51 + (((#xd8 #xde) (#x30 #xf9)) jamo42 0.95) ; jamo42 + ) + ;; second byte + (jamo51 + (((#x41 #x7e) (#x81 #xfe)) init 1.0)) + (jamo42 + (((#x31 #x7e) (#x91 #xfe)) init 1.0)) + ) + diff -r c2fc86e40fba -r 4be4d74db123 libguess/guess_tab.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libguess/guess_tab.c Sat May 27 11:02:08 2006 -0700 @@ -0,0 +1,732 @@ +/* State transition table for character code guessing */ +/* This file is automatically generated by guess.scm */ + +static signed char guess_eucj_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1, + }, + { /* state jis0201_kana */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state jis0213_1 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -1, + }, + { /* state jis0213_2 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -1, + }, +}; + +static guess_arc guess_eucj_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 0.8 }, /* init -> jis0201_kana */ + { 3, 0.95 }, /* init -> jis0213_2 */ + { 2, 1.0 }, /* init -> jis0213_1 */ + { 0, 1.0 }, /* jis0201_kana -> init */ + { 0, 1.0 }, /* jis0213_1 -> init */ + { 0, 1.0 }, /* jis0213_2 -> init */ +}; + +static signed char guess_sjis_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, + }, + { /* state jis0213 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -1, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -1, -1, -1, + }, +}; + +static guess_arc guess_sjis_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 1.0 }, /* init -> jis0213 */ + { 0, 0.8 }, /* init -> init */ + { 1, 0.95 }, /* init -> jis0213 */ + { 0, 0.8 }, /* init -> init */ + { 0, 1.0 }, /* jis0213 -> init */ +}; + +static signed char guess_utf8_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, -1, -1, + }, + { /* state 1byte_more */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state 2byte_more */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state 3byte_more */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state 4byte_more */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state 5byte_more */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, +}; + +static guess_arc guess_utf8_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 1.0 }, /* init -> 1byte_more */ + { 2, 1.0 }, /* init -> 2byte_more */ + { 3, 1.0 }, /* init -> 3byte_more */ + { 4, 1.0 }, /* init -> 4byte_more */ + { 5, 1.0 }, /* init -> 5byte_more */ + { 0, 1.0 }, /* 1byte_more -> init */ + { 1, 1.0 }, /* 2byte_more -> 1byte_more */ + { 2, 1.0 }, /* 3byte_more -> 2byte_more */ + { 3, 1.0 }, /* 4byte_more -> 3byte_more */ + { 4, 1.0 }, /* 5byte_more -> 4byte_more */ +}; + +static signed char guess_ucs2le_st[][256] = { + { /* state init */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + }, + { /* state le */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, -1, + }, + { /* state ascii */ + 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state multi */ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + }, +}; + +static guess_arc guess_ucs2le_ar[] = { + { 1, 1.0 }, /* init -> le */ + { 2, 1.0 }, /* init -> ascii */ + { 3, 1.0 }, /* init -> multi */ + { 0, 1.0 }, /* le -> init */ + { 0, 1.0 }, /* ascii -> init */ + { 0, 1.0 }, /* multi -> init */ +}; + +static signed char guess_ucs2be_st[][256] = { + { /* state init */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + }, + { /* state be */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, + }, + { /* state ascii */ + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state multi */ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + }, +}; + +static guess_arc guess_ucs2be_ar[] = { + { 1, 1.0 }, /* init -> be */ + { 2, 1.0 }, /* init -> ascii */ + { 3, 1.0 }, /* init -> multi */ + { 0, 1.0 }, /* be -> init */ + { 0, 1.0 }, /* ascii -> init */ + { 0, 1.0 }, /* multi -> init */ +}; + +static signed char guess_big5_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, + }, + { /* state 2byte */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1, + }, +}; + +static guess_arc guess_big5_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 1.0 }, /* init -> 2byte */ + { 0, 1.0 }, /* 2byte -> init */ +}; + +static signed char guess_gb2312_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, + }, + { /* state 2byte */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1, + }, +}; + +static guess_arc guess_gb2312_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 1.0 }, /* init -> 2byte */ + { 0, 1.0 }, /* 2byte -> init */ +}; + +static signed char guess_gb18030_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1, + }, + { /* state 2byte */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1, + }, + { /* state 4byte2 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + { /* state 4byte3 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -1, + }, + { /* state 4byte4 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, +}; + +static guess_arc guess_gb18030_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 1.0 }, /* init -> 2byte */ + { 2, 1.0 }, /* init -> 4byte2 */ + { 0, 1.0 }, /* 2byte -> init */ + { 3, 1.0 }, /* 4byte2 -> 4byte3 */ + { 4, 1.0 }, /* 4byte3 -> 4byte4 */ + { 0, 1.0 }, /* 4byte4 -> init */ +}; + +static signed char guess_euck_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, + }, + { /* state ks1001 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1, + }, +}; + +static guess_arc guess_euck_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 1.0 }, /* init -> ks1001 */ + { 0, 1.0 }, /* ks1001 -> init */ +}; + +static signed char guess_johab_st[][256] = { + { /* state init */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1, + }, + { /* state jamo51 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1, + -1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1, + }, + { /* state jamo42 */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -1, + }, +}; + +static guess_arc guess_johab_ar[] = { + { 0, 1.0 }, /* init -> init */ + { 1, 1.0 }, /* init -> jamo51 */ + { 2, 0.95 }, /* init -> jamo42 */ + { 0, 1.0 }, /* jamo51 -> init */ + { 0, 1.0 }, /* jamo42 -> init */ +}; + diff -r c2fc86e40fba -r 4be4d74db123 libguess/libguess.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libguess/libguess.h Sat May 27 11:02:08 2006 -0700 @@ -0,0 +1,47 @@ +/* + * This code is derivative of guess.c of Gauche-0.8.3. + * The following is the original copyright notice. + */ + +/* + * Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the authors nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _LIBGUESS_H +#define _LIBGUESS_H 1 + +/* prototypes */ +const char *guess_jp(const char *buf, int buflen); +const char *guess_tw(const char *buf, int buflen); +const char *guess_cn(const char *buf, int buflen); +const char *guess_kr(const char *buf, int buflen); + +#endif diff -r c2fc86e40fba -r 4be4d74db123 mk/rules.mk.in --- a/mk/rules.mk.in Wed May 24 16:31:15 2006 -0700 +++ b/mk/rules.mk.in Sat May 27 11:02:08 2006 -0700 @@ -306,3 +306,5 @@ CURL_CFLAGS = @CURL_CFLAGS@ CURL_LIBS = @CURL_LIBS@ MUSICBRAINZ_LIBS = @MUSICBRAINZ_LIBS@ +CHARDET_LIBS = @CHARDET_LIBS@ +SUBDIR_GUESS = @SUBDIR_GUESS@